Browse Source

Added bimg dependency.

Branimir Karadžić 8 years ago
parent
commit
8ce85d2462
100 changed files with 0 additions and 33466 deletions
  1. 0 34
      3rdparty/edtaa3/LICENSE.md
  2. 0 580
      3rdparty/edtaa3/edtaa3func.cpp
  3. 0 7
      3rdparty/edtaa3/edtaa3func.h
  4. 0 161
      3rdparty/etc1/LICENSE
  5. 0 686
      3rdparty/etc1/etc1.cpp
  6. 0 114
      3rdparty/etc1/etc1.h
  7. 0 24
      3rdparty/etc2/LICENSE.txt
  8. 0 90
      3rdparty/etc2/Math.hpp
  9. 0 51
      3rdparty/etc2/ProcessCommon.hpp
  10. 0 719
      3rdparty/etc2/ProcessRGB.cpp
  11. 0 9
      3rdparty/etc2/ProcessRGB.hpp
  12. 0 109
      3rdparty/etc2/Tables.cpp
  13. 0 25
      3rdparty/etc2/Tables.hpp
  14. 0 17
      3rdparty/etc2/Types.hpp
  15. 0 222
      3rdparty/etc2/Vector.hpp
  16. 0 32
      3rdparty/iqa/LICENSE
  17. 0 36
      3rdparty/iqa/README.txt
  18. 0 111
      3rdparty/iqa/include/convolve.h
  19. 0 55
      3rdparty/iqa/include/decimate.h
  20. 0 134
      3rdparty/iqa/include/iqa.h
  21. 0 68
      3rdparty/iqa/include/iqa_os.h
  22. 0 64
      3rdparty/iqa/include/math_utils.h
  23. 0 117
      3rdparty/iqa/include/ssim.h
  24. 0 195
      3rdparty/iqa/source/convolve.c
  25. 0 59
      3rdparty/iqa/source/decimate.c
  26. 0 82
      3rdparty/iqa/source/math_utils.c
  27. 0 277
      3rdparty/iqa/source/ms_ssim.c
  28. 0 50
      3rdparty/iqa/source/mse.c
  29. 0 42
      3rdparty/iqa/source/psnr.c
  30. 0 322
      3rdparty/iqa/source/ssim.c
  31. 0 20
      3rdparty/libsquish/LICENSE
  32. 0 35
      3rdparty/libsquish/README
  33. 0 350
      3rdparty/libsquish/alpha.cpp
  34. 0 41
      3rdparty/libsquish/alpha.h
  35. 0 392
      3rdparty/libsquish/clusterfit.cpp
  36. 0 61
      3rdparty/libsquish/clusterfit.h
  37. 0 214
      3rdparty/libsquish/colourblock.cpp
  38. 0 41
      3rdparty/libsquish/colourblock.h
  39. 0 54
      3rdparty/libsquish/colourfit.cpp
  40. 0 56
      3rdparty/libsquish/colourfit.h
  41. 0 121
      3rdparty/libsquish/colourset.cpp
  42. 0 58
      3rdparty/libsquish/colourset.h
  43. 0 49
      3rdparty/libsquish/config.h
  44. 0 259
      3rdparty/libsquish/maths.cpp
  45. 0 233
      3rdparty/libsquish/maths.h
  46. 0 201
      3rdparty/libsquish/rangefit.cpp
  47. 0 54
      3rdparty/libsquish/rangefit.h
  48. 0 32
      3rdparty/libsquish/simd.h
  49. 0 183
      3rdparty/libsquish/simd_float.h
  50. 0 172
      3rdparty/libsquish/singlecolourfit.cpp
  51. 0 58
      3rdparty/libsquish/singlecolourfit.h
  52. 0 1064
      3rdparty/libsquish/singlecolourlookup.inl
  53. 0 260
      3rdparty/libsquish/squish.cpp
  54. 0 269
      3rdparty/libsquish/squish.h
  55. 0 10
      3rdparty/lodepng/README.md
  56. 0 6224
      3rdparty/lodepng/lodepng.cpp
  57. 0 1759
      3rdparty/lodepng/lodepng.h
  58. 0 24
      3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt
  59. 0 75
      3rdparty/nvtt/bc6h/bits.h
  60. 0 133
      3rdparty/nvtt/bc6h/shapes_two.h
  61. 0 82
      3rdparty/nvtt/bc6h/tile.h
  62. 0 197
      3rdparty/nvtt/bc6h/zoh.cpp
  63. 0 65
      3rdparty/nvtt/bc6h/zoh.h
  64. 0 324
      3rdparty/nvtt/bc6h/zoh_utils.cpp
  65. 0 72
      3rdparty/nvtt/bc6h/zoh_utils.h
  66. 0 799
      3rdparty/nvtt/bc6h/zohone.cpp
  67. 0 883
      3rdparty/nvtt/bc6h/zohtwo.cpp
  68. 0 264
      3rdparty/nvtt/bc7/avpcl.cpp
  69. 0 99
      3rdparty/nvtt/bc7/avpcl.h
  70. 0 1066
      3rdparty/nvtt/bc7/avpcl_mode0.cpp
  71. 0 1047
      3rdparty/nvtt/bc7/avpcl_mode1.cpp
  72. 0 1004
      3rdparty/nvtt/bc7/avpcl_mode2.cpp
  73. 0 1059
      3rdparty/nvtt/bc7/avpcl_mode3.cpp
  74. 0 1214
      3rdparty/nvtt/bc7/avpcl_mode4.cpp
  75. 0 1216
      3rdparty/nvtt/bc7/avpcl_mode5.cpp
  76. 0 1055
      3rdparty/nvtt/bc7/avpcl_mode6.cpp
  77. 0 1094
      3rdparty/nvtt/bc7/avpcl_mode7.cpp
  78. 0 389
      3rdparty/nvtt/bc7/avpcl_utils.cpp
  79. 0 61
      3rdparty/nvtt/bc7/avpcl_utils.h
  80. 0 76
      3rdparty/nvtt/bc7/bits.h
  81. 0 81
      3rdparty/nvtt/bc7/endpts.h
  82. 0 132
      3rdparty/nvtt/bc7/shapes_three.h
  83. 0 133
      3rdparty/nvtt/bc7/shapes_two.h
  84. 0 41
      3rdparty/nvtt/bc7/tile.h
  85. 0 181
      3rdparty/nvtt/nvcore/array.h
  86. 0 437
      3rdparty/nvtt/nvcore/array.inl
  87. 0 216
      3rdparty/nvtt/nvcore/debug.h
  88. 0 57
      3rdparty/nvtt/nvcore/defsgnucdarwin.h
  89. 0 63
      3rdparty/nvtt/nvcore/defsgnuclinux.h
  90. 0 65
      3rdparty/nvtt/nvcore/defsgnucwin32.h
  91. 0 94
      3rdparty/nvtt/nvcore/defsvcwin32.h
  92. 0 68
      3rdparty/nvtt/nvcore/foreach.h
  93. 0 83
      3rdparty/nvtt/nvcore/hash.h
  94. 0 30
      3rdparty/nvtt/nvcore/memory.h
  95. 0 363
      3rdparty/nvtt/nvcore/nvcore.h
  96. 0 1030
      3rdparty/nvtt/nvcore/posh.h
  97. 0 459
      3rdparty/nvtt/nvcore/stdstream.h
  98. 0 163
      3rdparty/nvtt/nvcore/stream.h
  99. 0 429
      3rdparty/nvtt/nvcore/strlib.h
  100. 0 281
      3rdparty/nvtt/nvcore/utils.h

+ 0 - 34
3rdparty/edtaa3/LICENSE.md

@@ -1,34 +0,0 @@
-https://github.com/OpenGLInsights/OpenGLInsightsCode/blob/master/Chapter%2012%202D%20Shape%20Rendering%20by%20Distance%20Fields/LICENSE.txt
-
-The C code and the GLSL code for the OpenGL demo is public
-domain code. The distance transform code in the console
-application to create distance field textures, located in
-the file "edtaa3func.c", is MIT licensed, and free to use
-under the following conditions.
-
-https://github.com/OpenGLInsights/OpenGLInsightsCode/issues/6#issuecomment-67829157
-
-----
-
-Copyright (C) 2011 by Stefan Gustavson
-([email protected])
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-----

+ 0 - 580
3rdparty/edtaa3/edtaa3func.cpp

@@ -1,580 +0,0 @@
-/*
- * edtaa3()
- *
- * Sweep-and-update Euclidean distance transform of an
- * image. Positive pixels are treated as object pixels,
- * zero or negative pixels are treated as background.
- * An attempt is made to treat antialiased edges correctly.
- * The input image must have pixels in the range [0,1],
- * and the antialiased image should be a box-filter
- * sampling of the ideal, crisp edge.
- * If the antialias region is more than 1 pixel wide,
- * the result from this transform will be inaccurate.
- *
- * By Stefan Gustavson ([email protected]).
- *
- * Originally written in 1994, based on a verbal
- * description of Per-Erik Danielsson's SSED8 algorithm
- * as presented in the PhD dissertation of Ingemar
- * Ragnemalm. This is Per-Erik Danielsson's scanline
- * scheme from 1979 - I only implemented it in C.
- *
- * Updated in 2004 to treat border pixels correctly,
- * and cleaned up the code to improve readability.
- *
- * Updated in 2009 to handle anti-aliased edges,
- * as published in the article "Anti-aliased Euclidean
- * distance transform" by Stefan Gustavson and Robin Strand,
- * Pattern Recognition Letters 32 (2011) 252–257.
- *
- * Updated in 2011 to avoid a corner case causing an
- * infinite loop for some input data.
- *
-*/
-
-/*
-
-Copyright (C) 2011 by Stefan Gustavson
-
-([email protected])
-
-This code is distributed under the permissive "MIT license":
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-*/
-
-#include <math.h>
-
-/*
- * Compute the local gradient at edge pixels using convolution filters.
- * The gradient is computed only at edge pixels. At other places in the
- * image, it is never used, and it's mostly zero anyway.
- */
-void computegradient(double *img, int w, int h, double *gx, double *gy)
-{
-    int i,j,k;
-    double glength;
-#define SQRT2 1.4142136
-    for(i = 1; i < h-1; i++) { // Avoid edges where the kernels would spill over
-        for(j = 1; j < w-1; j++) {
-            k = i*w + j;
-            if((img[k]>0.0) && (img[k]<1.0)) { // Compute gradient for edge pixels only
-                gx[k] = -img[k-w-1] - SQRT2*img[k-1] - img[k+w-1] + img[k-w+1] + SQRT2*img[k+1] + img[k+w+1];
-                gy[k] = -img[k-w-1] - SQRT2*img[k-w] - img[k-w+1] + img[k+w-1] + SQRT2*img[k+w] + img[k+w+1];
-                glength = gx[k]*gx[k] + gy[k]*gy[k];
-                if(glength > 0.0) { // Avoid division by zero
-                    glength = sqrt(glength);
-                    gx[k]=gx[k]/glength;
-                    gy[k]=gy[k]/glength;
-                }
-            }
-        }
-    }
-    // TODO: Compute reasonable values for gx, gy also around the image edges.
-    // (These are zero now, which reduces the accuracy for a 1-pixel wide region
-	// around the image edge.) 2x2 kernels would be suitable for this.
-}
-
-/*
- * A somewhat tricky function to approximate the distance to an edge in a
- * certain pixel, with consideration to either the local gradient (gx,gy)
- * or the direction to the pixel (dx,dy) and the pixel greyscale value a.
- * The latter alternative, using (dx,dy), is the metric used by edtaa2().
- * Using a local estimate of the edge gradient (gx,gy) yields much better
- * accuracy at and near edges, and reduces the error even at distant pixels
- * provided that the gradient direction is accurately estimated.
- */
-double edgedf(double gx, double gy, double a)
-{
-    double df, glength, temp, a1;
-
-    if ((gx == 0) || (gy == 0)) { // Either A) gu or gv are zero, or B) both
-        df = 0.5-a;  // Linear approximation is A) correct or B) a fair guess
-    } else {
-        glength = sqrt(gx*gx + gy*gy);
-        if(glength>0) {
-            gx = gx/glength;
-            gy = gy/glength;
-        }
-        /* Everything is symmetric wrt sign and transposition,
-         * so move to first octant (gx>=0, gy>=0, gx>=gy) to
-         * avoid handling all possible edge directions.
-         */
-        gx = fabs(gx);
-        gy = fabs(gy);
-        if(gx<gy) {
-            temp = gx;
-            gx = gy;
-            gy = temp;
-        }
-        a1 = 0.5*gy/gx;
-        if (a < a1) { // 0 <= a < a1
-            df = 0.5*(gx + gy) - sqrt(2.0*gx*gy*a);
-        } else if (a < (1.0-a1)) { // a1 <= a <= 1-a1
-            df = (0.5-a)*gx;
-        } else { // 1-a1 < a <= 1
-            df = -0.5*(gx + gy) + sqrt(2.0*gx*gy*(1.0-a));
-        }
-    }    
-    return df;
-}
-
-double distaa3(double *img, double *gximg, double *gyimg, int w, int c, int xc, int yc, int xi, int yi)
-{
-  double di, df, dx, dy, gx, gy, a;
-  int closest;
-  
-  closest = c-xc-yc*w; // Index to the edge pixel pointed to from c
-  a = img[closest];    // Grayscale value at the edge pixel
-  gx = gximg[closest]; // X gradient component at the edge pixel
-  gy = gyimg[closest]; // Y gradient component at the edge pixel
-  
-  if(a > 1.0) a = 1.0;
-  if(a < 0.0) a = 0.0; // Clip grayscale values outside the range [0,1]
-  if(a == 0.0) return 1000000.0; // Not an object pixel, return "very far" ("don't know yet")
-
-  dx = (double)xi;
-  dy = (double)yi;
-  di = sqrt(dx*dx + dy*dy); // Length of integer vector, like a traditional EDT
-  if(di==0) { // Use local gradient only at edges
-      // Estimate based on local gradient only
-      df = edgedf(gx, gy, a);
-  } else {
-      // Estimate gradient based on direction to edge (accurate for large di)
-      df = edgedf(dx, dy, a);
-  }
-  return di + df; // Same metric as edtaa2, except at edges (where di=0)
-}
-
-// Shorthand macro: add ubiquitous parameters img, gx, gy and w and call distaa3()
-#define DISTAA(c,xc,yc,xi,yi) (distaa3(img, gx, gy, w, c, xc, yc, xi, yi))
-
-void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist)
-{
-  int x, y, i, c;
-  int offset_u, offset_ur, offset_r, offset_rd,
-  offset_d, offset_dl, offset_l, offset_lu;
-  double olddist, newdist;
-  int cdistx, cdisty, newdistx, newdisty;
-  int changed;
-  double epsilon = 1e-3; // Safeguard against errors due to limited precision
-
-  /* Initialize index offsets for the current image width */
-  offset_u = -w;
-  offset_ur = -w+1;
-  offset_r = 1;
-  offset_rd = w+1;
-  offset_d = w;
-  offset_dl = w-1;
-  offset_l = -1;
-  offset_lu = -w-1;
-
-  /* Initialize the distance images */
-  for(i=0; i<w*h; i++) {
-    distx[i] = 0; // At first, all pixels point to
-    disty[i] = 0; // themselves as the closest known.
-    if(img[i] <= 0.0)
-      {
-	dist[i]= 1000000.0; // Big value, means "not set yet"
-      }
-    else if (img[i]<1.0) {
-      dist[i] = edgedf(gx[i], gy[i], img[i]); // Gradient-assisted estimate
-    }
-    else {
-      dist[i]= 0.0; // Inside the object
-    }
-  }
-
-  /* Perform the transformation */
-  do
-    {
-      changed = 0;
-
-      /* Scan rows, except first row */
-      for(y=1; y<h; y++)
-        {
-
-          /* move index to leftmost pixel of current row */
-          i = y*w;
-
-          /* scan right, propagate distances from above & left */
-
-          /* Leftmost pixel is special, has no left neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If non-zero distance or not set yet
-            {
-	      c = i + offset_u; // Index of candidate for testing
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_ur;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-          i++;
-
-          /* Middle pixels have all neighbors */
-          for(x=1; x<w-1; x++, i++)
-            {
-              olddist = dist[i];
-              if(olddist <= 0) continue; // No need to update further
-
-	      c = i+offset_l;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_lu;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_u;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_ur;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-
-          /* Rightmost pixel of row is special, has no right neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If not already zero distance
-            {
-	      c = i+offset_l;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_lu;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_u;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty+1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-
-          /* Move index to second rightmost pixel of current row. */
-          /* Rightmost pixel is skipped, it has no right neighbor. */
-          i = y*w + w-2;
-
-          /* scan left, propagate distance from right */
-          for(x=w-2; x>=0; x--, i--)
-            {
-              olddist = dist[i];
-              if(olddist <= 0) continue; // Already zero distance
-
-	      c = i+offset_r;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-        }
-      
-      /* Scan rows in reverse order, except last row */
-      for(y=h-2; y>=0; y--)
-        {
-          /* move index to rightmost pixel of current row */
-          i = y*w + w-1;
-
-          /* Scan left, propagate distances from below & right */
-
-          /* Rightmost pixel is special, has no right neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If not already zero distance
-            {
-	      c = i+offset_d;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_dl;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-          i--;
-
-          /* Middle pixels have all neighbors */
-          for(x=w-2; x>0; x--, i--)
-            {
-              olddist = dist[i];
-              if(olddist <= 0) continue; // Already zero distance
-
-	      c = i+offset_r;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_rd;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_d;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_dl;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-          /* Leftmost pixel is special, has no left neighbors */
-          olddist = dist[i];
-          if(olddist > 0) // If not already zero distance
-            {
-	      c = i+offset_r;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_rd;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx-1;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  olddist=newdist;
-                  changed = 1;
-                }
-
-	      c = i+offset_d;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx;
-              newdisty = cdisty-1;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-
-          /* Move index to second leftmost pixel of current row. */
-          /* Leftmost pixel is skipped, it has no left neighbor. */
-          i = y*w + 1;
-          for(x=1; x<w; x++, i++)
-            {
-              /* scan right, propagate distance from left */
-              olddist = dist[i];
-              if(olddist <= 0) continue; // Already zero distance
-
-	      c = i+offset_l;
-	      cdistx = distx[c];
-	      cdisty = disty[c];
-              newdistx = cdistx+1;
-              newdisty = cdisty;
-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
-              if(newdist < olddist-epsilon)
-                {
-                  distx[i]=newdistx;
-                  disty[i]=newdisty;
-                  dist[i]=newdist;
-                  changed = 1;
-                }
-            }
-        }
-    }
-  while(changed); // Sweep until no more updates are made
-
-  /* The transformation is completed. */
-
-}

+ 0 - 7
3rdparty/edtaa3/edtaa3func.h

@@ -1,7 +0,0 @@
-#ifndef EDTAA3_H_HEADER_GUARD
-#define EDTAA3_H_HEADER_GUARD
-
-extern void computegradient(double *img, int w, int h, double *gx, double *gy);
-extern void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist);
-
-#endif // EDTAA3_H_HEADER_GUARD

+ 0 - 161
3rdparty/etc1/LICENSE

@@ -1,161 +0,0 @@
-Apache License
-
-Version 2.0, January 2004
-
-http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-"License" shall mean the terms and conditions for use, reproduction, and
-distribution as defined by Sections 1 through 9 of this document.
-
-"Licensor" shall mean the copyright owner or entity authorized by the
-copyright owner that is granting the License.
-
-"Legal Entity" shall mean the union of the acting entity and all other
-entities that control, are controlled by, or are under common control with
-that entity. For the purposes of this definition, "control" means (i) the
-power, direct or indirect, to cause the direction or management of such 
-entity, whether by contract or otherwise, or (ii) ownership of fifty 
-percent (50%) or more of the outstanding shares, or (iii) beneficial 
-ownership of such entity.
-
-"You" (or "Your") shall mean an individual or Legal Entity exercising 
-permissions granted by this License.
-
-"Source" form shall mean the preferred form for making modifications, 
-including but not limited to software source code, documentation 
-source, and configuration files.
-
-"Object" form shall mean any form resulting from mechanical transformation 
-or translation of a Source form, including but not limited to compiled 
-object code, generated documentation, and conversions to other media types.
-
-"Work" shall mean the work of authorship, whether in Source or Object 
-form, made available under the License, as indicated by a copyright 
-notice that is included in or attached to the work (an example is 
-provided in the Appendix below).
-
-"Derivative Works" shall mean any work, whether in Source or Object 
-form, that is based on (or derived from) the Work and for which the 
-editorial revisions, annotations, elaborations, or other modifications 
-represent, as a whole, an original work of authorship. For the purposes 
-of this License, Derivative Works shall not include works that remain 
-separable from, or merely link (or bind by name) to the interfaces of, 
-the Work and Derivative Works thereof.
-
-"Contribution" shall mean any work of authorship, including the original 
-version of the Work and any modifications or additions to that Work or 
-Derivative Works thereof, that is intentionally submitted to Licensor 
-for inclusion in the Work by the copyright owner or by an individual or 
-Legal Entity authorized to submit on behalf of the copyright owner. For 
-the purposes of this definition, "submitted" means any form of electronic, 
-verbal, or written communication sent to the Licensor or its 
-representatives, including but not limited to communication on electronic 
-mailing lists, source code control systems, and issue tracking systems that 
-are managed by, or on behalf of, the Licensor for the purpose of discussing 
-and improving the Work, but excluding communication that is conspicuously 
-marked or otherwise designated in writing by the copyright owner as "Not 
-a Contribution."
-
-"Contributor" shall mean Licensor and any individual or Legal Entity on 
-behalf of whom a Contribution has been received by Licensor and subsequently 
-incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of this 
-License, each Contributor hereby grants to You a perpetual, worldwide, 
-non-exclusive, no-charge, royalty-free, irrevocable copyright license to 
-reproduce, prepare Derivative Works of, publicly display, publicly perform, 
-sublicense, and distribute the Work and such Derivative Works in Source or 
-Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of this 
-License, each Contributor hereby grants to You a perpetual, worldwide, 
-non-exclusive, no-charge, royalty-free, irrevocable (except as stated in 
-this section) patent license to make, have made, use, offer to sell, sell, 
-import, and otherwise transfer the Work, where such license applies only to 
-those patent claims licensable by such Contributor that are necessarily 
-infringed by their Contribution(s) alone or by combination of their 
-Contribution(s) with the Work to which such Contribution(s) was submitted. 
-If You institute patent litigation against any entity (including a cross-claim
-or counterclaim in a lawsuit) alleging that the Work or a Contribution 
-incorporated within the Work constitutes direct or contributory patent 
-infringement, then any patent licenses granted to You under this License 
-for that Work shall terminate as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the Work or 
-Derivative Works thereof in any medium, with or without modifications, and 
-in Source or Object form, provided that You meet the following conditions:
-
-You must give any other recipients of the Work or Derivative Works a copy of 
-this License; and
-You must cause any modified files to carry prominent notices stating that 
-You changed the files; and
-You must retain, in the Source form of any Derivative Works that You 
-distribute, all copyright, patent, trademark, and attribution notices 
-from the Source form of the Work, excluding those notices that do not 
-pertain to any part of the Derivative Works; and
-If the Work includes a "NOTICE" text file as part of its distribution, 
-then any Derivative Works that You distribute must include a readable 
-copy of the attribution notices contained within such NOTICE file, excluding
-those notices that do not pertain to any part of the Derivative Works, in
-at least one of the following places: within a NOTICE text file distributed 
-as part of the Derivative Works; within the Source form or documentation, if 
-provided along with the Derivative Works; or, within a display generated by 
-the Derivative Works, if and wherever such third-party notices normally 
-appear. The contents of the NOTICE file are for informational purposes 
-only and do not modify the License. You may add Your own attribution 
-notices within Derivative Works that You distribute, alongside or as 
-an addendum to the NOTICE text from the Work, provided that such additional 
-attribution notices cannot be construed as modifying the License. 
-
-You may add Your own copyright statement to Your modifications and may provide
-additional or different license terms and conditions for use, reproduction, or
-distribution of Your modifications, or for any such Derivative Works as a 
-whole, provided Your use, reproduction, and distribution of the Work otherwise 
-complies with the conditions stated in this License.
-5. Submission of Contributions. Unless You explicitly state otherwise, any 
-Contribution intentionally submitted for inclusion in the Work by You to the 
-Licensor shall be under the terms and conditions of this License, without any 
-additional terms or conditions. Notwithstanding the above, nothing herein 
-shall supersede or modify the terms of any separate license agreement you 
-may have executed with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade names, 
-trademarks, service marks, or product names of the Licensor, except as 
-required for reasonable and customary use in describing the origin of the 
-Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or agreed to 
-in writing, Licensor provides the Work (and each Contributor provides its 
-Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 
-ANY KIND, either express or implied, including, without limitation, any 
-warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or 
-FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining 
-the appropriateness of using or redistributing the Work and assume any risks 
-associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory, whether in
-tort (including negligence), contract, or otherwise, unless required by 
-applicable law (such as deliberate and grossly negligent acts) or agreed to 
-in writing, shall any Contributor be liable to You for damages, including 
-any direct, indirect, special, incidental, or consequential damages of any 
-character arising as a result of this License or out of the use or inability 
-to use the Work (including but not limited to damages for loss of goodwill, 
-work stoppage, computer failure or malfunction, or any and all other 
-commercial damages or losses), even if such Contributor has been advised 
-of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing the 
-Work or Derivative Works thereof, You may choose to offer, and charge a 
-fee for, acceptance of support, warranty, indemnity, or other liability 
-obligations and/or rights consistent with this License. However, in accepting
-such obligations, You may act only on Your own behalf and on Your sole 
-responsibility, not on behalf of any other Contributor, and only if You
-agree to indemnify, defend, and hold each Contributor harmless for any 
-liability incurred by, or claims asserted against, such Contributor by 
-reason of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS

+ 0 - 686
3rdparty/etc1/etc1.cpp

@@ -1,686 +0,0 @@
-// Copyright 2009 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-// This is a fork of the AOSP project ETC1 codec. The original code can be found
-// at the following web site:
-// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/include/ETC1/
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include "etc1.h"
-
-#include <cstring>
-
-/* From http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
-
- The number of bits that represent a 4x4 texel block is 64 bits if
- <internalformat> is given by ETC1_RGB8_OES.
-
- The data for a block is a number of bytes,
-
- {q0, q1, q2, q3, q4, q5, q6, q7}
-
- where byte q0 is located at the lowest memory address and q7 at
- the highest. The 64 bits specifying the block is then represented
- by the following 64 bit integer:
-
- int64bit = 256*(256*(256*(256*(256*(256*(256*q0+q1)+q2)+q3)+q4)+q5)+q6)+q7;
-
- ETC1_RGB8_OES:
-
- a) bit layout in bits 63 through 32 if diffbit = 0
-
- 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
- -----------------------------------------------
- | base col1 | base col2 | base col1 | base col2 |
- | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)|
- -----------------------------------------------
-
- 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32
- ---------------------------------------------------
- | base col1 | base col2 | table  | table  |diff|flip|
- | B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
- ---------------------------------------------------
-
-
- b) bit layout in bits 63 through 32 if diffbit = 1
-
- 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
- -----------------------------------------------
- | base col1    | dcol 2 | base col1    | dcol 2 |
- | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    |
- -----------------------------------------------
-
- 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32
- ---------------------------------------------------
- | base col 1   | dcol 2 | table  | table  |diff|flip|
- | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
- ---------------------------------------------------
-
-
- c) bit layout in bits 31 through 0 (in both cases)
-
- 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
- -----------------------------------------------
- |       most significant pixel index bits       |
- | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a|
- -----------------------------------------------
-
- 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
- --------------------------------------------------
- |         least significant pixel index bits       |
- | p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
- --------------------------------------------------
-
-
- Add table 3.17.2: Intensity modifier sets for ETC1 compressed textures:
-
- table codeword                modifier table
- ------------------        ----------------------
- 0                     -8  -2  2   8
- 1                    -17  -5  5  17
- 2                    -29  -9  9  29
- 3                    -42 -13 13  42
- 4                    -60 -18 18  60
- 5                    -80 -24 24  80
- 6                   -106 -33 33 106
- 7                   -183 -47 47 183
-
-
- Add table 3.17.3 Mapping from pixel index values to modifier values for
- ETC1 compressed textures:
-
- pixel index value
- ---------------
- msb     lsb           resulting modifier value
- -----   -----          -------------------------
- 1       1            -b (large negative value)
- 1       0            -a (small negative value)
- 0       0             a (small positive value)
- 0       1             b (large positive value)
-
-
- */
-
-static const int kModifierTable[] = {
-/* 0 */2, 8, -2, -8,
-/* 1 */5, 17, -5, -17,
-/* 2 */9, 29, -9, -29,
-/* 3 */13, 42, -13, -42,
-/* 4 */18, 60, -18, -60,
-/* 5 */24, 80, -24, -80,
-/* 6 */33, 106, -33, -106,
-/* 7 */47, 183, -47, -183 };
-
-static const int kLookup[8] = { 0, 1, 2, 3, -4, -3, -2, -1 };
-
-static inline etc1_byte clamp(int x) {
-    return (etc1_byte) (x >= 0 ? (x < 255 ? x : 255) : 0);
-}
-
-static
-inline int convert4To8(int b) {
-    int c = b & 0xf;
-    return (c << 4) | c;
-}
-
-static
-inline int convert5To8(int b) {
-    int c = b & 0x1f;
-    return (c << 3) | (c >> 2);
-}
-
-static
-inline int convert6To8(int b) {
-    int c = b & 0x3f;
-    return (c << 2) | (c >> 4);
-}
-
-static
-inline int divideBy255(int d) {
-    return (d + 128 + (d >> 8)) >> 8;
-}
-
-static
-inline int convert8To4(int b) {
-    int c = b & 0xff;
-    return divideBy255(c * 15);
-}
-
-static
-inline int convert8To5(int b) {
-    int c = b & 0xff;
-    return divideBy255(c * 31);
-}
-
-static
-inline int convertDiff(int base, int diff) {
-    return convert5To8((0x1f & base) + kLookup[0x7 & diff]);
-}
-
-static
-void decode_subblock(etc1_byte* pOut, int r, int g, int b, const int* table,
-        etc1_uint32 low, bool second, bool flipped) {
-    int baseX = 0;
-    int baseY = 0;
-    if (second) {
-        if (flipped) {
-            baseY = 2;
-        } else {
-            baseX = 2;
-        }
-    }
-    for (int i = 0; i < 8; i++) {
-        int x, y;
-        if (flipped) {
-            x = baseX + (i >> 1);
-            y = baseY + (i & 1);
-        } else {
-            x = baseX + (i >> 2);
-            y = baseY + (i & 3);
-        }
-        int k = y + (x * 4);
-        int offset = ((low >> k) & 1) | ((low >> (k + 15)) & 2);
-        int delta = table[offset];
-        etc1_byte* q = pOut + 3 * (x + 4 * y);
-        *q++ = clamp(r + delta);
-        *q++ = clamp(g + delta);
-        *q++ = clamp(b + delta);
-    }
-}
-
-// Input is an ETC1 compressed version of the data.
-// Output is a 4 x 4 square of 3-byte pixels in form R, G, B
-
-void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut) {
-    etc1_uint32 high = (pIn[0] << 24) | (pIn[1] << 16) | (pIn[2] << 8) | pIn[3];
-    etc1_uint32 low = (pIn[4] << 24) | (pIn[5] << 16) | (pIn[6] << 8) | pIn[7];
-    int r1, r2, g1, g2, b1, b2;
-    if (high & 2) {
-        // differential
-        int rBase = high >> 27;
-        int gBase = high >> 19;
-        int bBase = high >> 11;
-        r1 = convert5To8(rBase);
-        r2 = convertDiff(rBase, high >> 24);
-        g1 = convert5To8(gBase);
-        g2 = convertDiff(gBase, high >> 16);
-        b1 = convert5To8(bBase);
-        b2 = convertDiff(bBase, high >> 8);
-    } else {
-        // not differential
-        r1 = convert4To8(high >> 28);
-        r2 = convert4To8(high >> 24);
-        g1 = convert4To8(high >> 20);
-        g2 = convert4To8(high >> 16);
-        b1 = convert4To8(high >> 12);
-        b2 = convert4To8(high >> 8);
-    }
-    int tableIndexA = 7 & (high >> 5);
-    int tableIndexB = 7 & (high >> 2);
-    const int* tableA = kModifierTable + tableIndexA * 4;
-    const int* tableB = kModifierTable + tableIndexB * 4;
-    bool flipped = (high & 1) != 0;
-    decode_subblock(pOut, r1, g1, b1, tableA, low, false, flipped);
-    decode_subblock(pOut, r2, g2, b2, tableB, low, true, flipped);
-}
-
-typedef struct {
-    etc1_uint32 high;
-    etc1_uint32 low;
-    etc1_uint32 score; // Lower is more accurate
-} etc_compressed;
-
-static
-inline void take_best(etc_compressed* a, const etc_compressed* b) {
-    if (a->score > b->score) {
-        *a = *b;
-    }
-}
-
-static
-void etc_average_colors_subblock(const etc1_byte* pIn, etc1_uint32 inMask,
-        etc1_byte* pColors, bool flipped, bool second) {
-    int r = 0;
-    int g = 0;
-    int b = 0;
-
-    if (flipped) {
-        int by = 0;
-        if (second) {
-            by = 2;
-        }
-        for (int y = 0; y < 2; y++) {
-            int yy = by + y;
-            for (int x = 0; x < 4; x++) {
-                int i = x + 4 * yy;
-                if (inMask & (1 << i)) {
-                    const etc1_byte* p = pIn + i * 3;
-                    r += *(p++);
-                    g += *(p++);
-                    b += *(p++);
-                }
-            }
-        }
-    } else {
-        int bx = 0;
-        if (second) {
-            bx = 2;
-        }
-        for (int y = 0; y < 4; y++) {
-            for (int x = 0; x < 2; x++) {
-                int xx = bx + x;
-                int i = xx + 4 * y;
-                if (inMask & (1 << i)) {
-                    const etc1_byte* p = pIn + i * 3;
-                    r += *(p++);
-                    g += *(p++);
-                    b += *(p++);
-                }
-            }
-        }
-    }
-    pColors[0] = (etc1_byte)((r + 4) >> 3);
-    pColors[1] = (etc1_byte)((g + 4) >> 3);
-    pColors[2] = (etc1_byte)((b + 4) >> 3);
-}
-
-static
-inline int square(int x) {
-    return x * x;
-}
-
-static etc1_uint32 chooseModifier(const etc1_byte* pBaseColors,
-        const etc1_byte* pIn, etc1_uint32 *pLow, int bitIndex,
-        const int* pModifierTable) {
-    etc1_uint32 bestScore = ~0;
-    int bestIndex = 0;
-    int pixelR = pIn[0];
-    int pixelG = pIn[1];
-    int pixelB = pIn[2];
-    int r = pBaseColors[0];
-    int g = pBaseColors[1];
-    int b = pBaseColors[2];
-    for (int i = 0; i < 4; i++) {
-        int modifier = pModifierTable[i];
-        int decodedG = clamp(g + modifier);
-        etc1_uint32 score = (etc1_uint32) (6 * square(decodedG - pixelG));
-        if (score >= bestScore) {
-            continue;
-        }
-        int decodedR = clamp(r + modifier);
-        score += (etc1_uint32) (3 * square(decodedR - pixelR));
-        if (score >= bestScore) {
-            continue;
-        }
-        int decodedB = clamp(b + modifier);
-        score += (etc1_uint32) square(decodedB - pixelB);
-        if (score < bestScore) {
-            bestScore = score;
-            bestIndex = i;
-        }
-    }
-    etc1_uint32 lowMask = (((bestIndex >> 1) << 16) | (bestIndex & 1))
-            << bitIndex;
-    *pLow |= lowMask;
-    return bestScore;
-}
-
-static
-void etc_encode_subblock_helper(const etc1_byte* pIn, etc1_uint32 inMask,
-        etc_compressed* pCompressed, bool flipped, bool second,
-        const etc1_byte* pBaseColors, const int* pModifierTable) {
-    int score = pCompressed->score;
-    if (flipped) {
-        int by = 0;
-        if (second) {
-            by = 2;
-        }
-        for (int y = 0; y < 2; y++) {
-            int yy = by + y;
-            for (int x = 0; x < 4; x++) {
-                int i = x + 4 * yy;
-                if (inMask & (1 << i)) {
-                    score += chooseModifier(pBaseColors, pIn + i * 3,
-                            &pCompressed->low, yy + x * 4, pModifierTable);
-                }
-            }
-        }
-    } else {
-        int bx = 0;
-        if (second) {
-            bx = 2;
-        }
-        for (int y = 0; y < 4; y++) {
-            for (int x = 0; x < 2; x++) {
-                int xx = bx + x;
-                int i = xx + 4 * y;
-                if (inMask & (1 << i)) {
-                    score += chooseModifier(pBaseColors, pIn + i * 3,
-                            &pCompressed->low, y + xx * 4, pModifierTable);
-                }
-            }
-        }
-    }
-    pCompressed->score = score;
-}
-
-static bool inRange4bitSigned(int color) {
-    return color >= -4 && color <= 3;
-}
-
-static void etc_encodeBaseColors(etc1_byte* pBaseColors,
-        const etc1_byte* pColors, etc_compressed* pCompressed) {
-    int r1, g1, b1, r2, g2, b2; // 8 bit base colors for sub-blocks
-    bool differential;
-    {
-        int r51 = convert8To5(pColors[0]);
-        int g51 = convert8To5(pColors[1]);
-        int b51 = convert8To5(pColors[2]);
-        int r52 = convert8To5(pColors[3]);
-        int g52 = convert8To5(pColors[4]);
-        int b52 = convert8To5(pColors[5]);
-
-        r1 = convert5To8(r51);
-        g1 = convert5To8(g51);
-        b1 = convert5To8(b51);
-
-        int dr = r52 - r51;
-        int dg = g52 - g51;
-        int db = b52 - b51;
-
-        differential = inRange4bitSigned(dr) && inRange4bitSigned(dg)
-                && inRange4bitSigned(db);
-        if (differential) {
-            r2 = convert5To8(r51 + dr);
-            g2 = convert5To8(g51 + dg);
-            b2 = convert5To8(b51 + db);
-            pCompressed->high |= (r51 << 27) | ((7 & dr) << 24) | (g51 << 19)
-                    | ((7 & dg) << 16) | (b51 << 11) | ((7 & db) << 8) | 2;
-        }
-    }
-
-    if (!differential) {
-        int r41 = convert8To4(pColors[0]);
-        int g41 = convert8To4(pColors[1]);
-        int b41 = convert8To4(pColors[2]);
-        int r42 = convert8To4(pColors[3]);
-        int g42 = convert8To4(pColors[4]);
-        int b42 = convert8To4(pColors[5]);
-        r1 = convert4To8(r41);
-        g1 = convert4To8(g41);
-        b1 = convert4To8(b41);
-        r2 = convert4To8(r42);
-        g2 = convert4To8(g42);
-        b2 = convert4To8(b42);
-        pCompressed->high |= (r41 << 28) | (r42 << 24) | (g41 << 20) | (g42
-                << 16) | (b41 << 12) | (b42 << 8);
-    }
-    pBaseColors[0] = r1;
-    pBaseColors[1] = g1;
-    pBaseColors[2] = b1;
-    pBaseColors[3] = r2;
-    pBaseColors[4] = g2;
-    pBaseColors[5] = b2;
-}
-
-static
-void etc_encode_block_helper(const etc1_byte* pIn, etc1_uint32 inMask,
-        const etc1_byte* pColors, etc_compressed* pCompressed, bool flipped) {
-    pCompressed->score = ~0;
-    pCompressed->high = (flipped ? 1 : 0);
-    pCompressed->low = 0;
-
-    etc1_byte pBaseColors[6];
-
-    etc_encodeBaseColors(pBaseColors, pColors, pCompressed);
-
-    int originalHigh = pCompressed->high;
-
-    const int* pModifierTable = kModifierTable;
-    for (int i = 0; i < 8; i++, pModifierTable += 4) {
-        etc_compressed temp;
-        temp.score = 0;
-        temp.high = originalHigh | (i << 5);
-        temp.low = 0;
-        etc_encode_subblock_helper(pIn, inMask, &temp, flipped, false,
-                pBaseColors, pModifierTable);
-        take_best(pCompressed, &temp);
-    }
-    pModifierTable = kModifierTable;
-    etc_compressed firstHalf = *pCompressed;
-    for (int i = 0; i < 8; i++, pModifierTable += 4) {
-        etc_compressed temp;
-        temp.score = firstHalf.score;
-        temp.high = firstHalf.high | (i << 2);
-        temp.low = firstHalf.low;
-        etc_encode_subblock_helper(pIn, inMask, &temp, flipped, true,
-                pBaseColors + 3, pModifierTable);
-        if (i == 0) {
-            *pCompressed = temp;
-        } else {
-            take_best(pCompressed, &temp);
-        }
-    }
-}
-
-static void writeBigEndian(etc1_byte* pOut, etc1_uint32 d) {
-    pOut[0] = (etc1_byte)(d >> 24);
-    pOut[1] = (etc1_byte)(d >> 16);
-    pOut[2] = (etc1_byte)(d >> 8);
-    pOut[3] = (etc1_byte) d;
-}
-
-// Input is a 4 x 4 square of 3-byte pixels in form R, G, B
-// inmask is a 16-bit mask where bit (1 << (x + y * 4)) tells whether the corresponding (x,y)
-// pixel is valid or not. Invalid pixel color values are ignored when compressing.
-// Output is an ETC1 compressed version of the data.
-
-void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 inMask,
-        etc1_byte* pOut) {
-    etc1_byte colors[6];
-    etc1_byte flippedColors[6];
-    etc_average_colors_subblock(pIn, inMask, colors, false, false);
-    etc_average_colors_subblock(pIn, inMask, colors + 3, false, true);
-    etc_average_colors_subblock(pIn, inMask, flippedColors, true, false);
-    etc_average_colors_subblock(pIn, inMask, flippedColors + 3, true, true);
-
-    etc_compressed a, b;
-    etc_encode_block_helper(pIn, inMask, colors, &a, false);
-    etc_encode_block_helper(pIn, inMask, flippedColors, &b, true);
-    take_best(&a, &b);
-    writeBigEndian(pOut, a.high);
-    writeBigEndian(pOut + 4, a.low);
-}
-
-// Return the size of the encoded image data (does not include size of PKM header).
-
-etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height) {
-    return (((width + 3) & ~3) * ((height + 3) & ~3)) >> 1;
-}
-
-// Encode an entire image.
-// pIn - pointer to the image data. Formatted such that the Red component of
-//       pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset;
-// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
-
-int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut) {
-    if (pixelSize < 2 || pixelSize > 4) {
-        return -1;
-    }
-    static const unsigned short kYMask[] = { 0x0, 0xf, 0xff, 0xfff, 0xffff };
-    static const unsigned short kXMask[] = { 0x0, 0x1111, 0x3333, 0x7777,
-            0xffff };
-    etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
-    etc1_byte encoded[ETC1_ENCODED_BLOCK_SIZE];
-
-    etc1_uint32 encodedWidth = (width + 3) & ~3;
-    etc1_uint32 encodedHeight = (height + 3) & ~3;
-
-    for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
-        etc1_uint32 yEnd = height - y;
-        if (yEnd > 4) {
-            yEnd = 4;
-        }
-        int ymask = kYMask[yEnd];
-        for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
-            etc1_uint32 xEnd = width - x;
-            if (xEnd > 4) {
-                xEnd = 4;
-            }
-            int mask = ymask & kXMask[xEnd];
-            for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
-                etc1_byte* q = block + (cy * 4) * 3;
-                const etc1_byte* p = pIn + pixelSize * x + stride * (y + cy);
-                if (pixelSize >= 3) {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        memcpy(q, p, 3);
-                        q += 3;
-                        p += pixelSize;
-                    }
-                } else {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        int pixel = (p[1] << 8) | p[0];
-                        *q++ = convert5To8(pixel >> 11);
-                        *q++ = convert6To8(pixel >> 5);
-                        *q++ = convert5To8(pixel);
-                        p += pixelSize;
-                    }
-                }
-            }
-            etc1_encode_block(block, mask, encoded);
-            memcpy(pOut, encoded, sizeof(encoded));
-            pOut += sizeof(encoded);
-        }
-    }
-    return 0;
-}
-
-// Decode an entire image.
-// pIn - pointer to encoded data.
-// pOut - pointer to the image data. Will be written such that the Red component of
-//       pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset. Must be
-//        large enough to store entire image.
-
-
-int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
-        etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride) {
-    if (pixelSize < 2 || pixelSize > 4) {
-        return -1;
-    }
-    etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
-
-    etc1_uint32 encodedWidth = (width + 3) & ~3;
-    etc1_uint32 encodedHeight = (height + 3) & ~3;
-
-    for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
-        etc1_uint32 yEnd = height - y;
-        if (yEnd > 4) {
-            yEnd = 4;
-        }
-        for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
-            etc1_uint32 xEnd = width - x;
-            if (xEnd > 4) {
-                xEnd = 4;
-            }
-            etc1_decode_block(pIn, block);
-            pIn += ETC1_ENCODED_BLOCK_SIZE;
-            for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
-                const etc1_byte* q = block + (cy * 4) * 3;
-                etc1_byte* p = pOut + pixelSize * x + stride * (y + cy);
-                if (pixelSize >= 3) {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        memcpy(p, q, 3);
-                        q += 3;
-                        p += pixelSize;
-                    }
-                } else {
-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
-                        etc1_byte r = *q++;
-                        etc1_byte g = *q++;
-                        etc1_byte b = *q++;
-                        etc1_uint32 pixel = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
-                        *p++ = (etc1_byte) pixel;
-                        *p++ = (etc1_byte) (pixel >> 8);
-                    }
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-static const char kMagic[] = { 'P', 'K', 'M', ' ', '1', '0' };
-
-static const etc1_uint32 ETC1_PKM_FORMAT_OFFSET = 6;
-static const etc1_uint32 ETC1_PKM_ENCODED_WIDTH_OFFSET = 8;
-static const etc1_uint32 ETC1_PKM_ENCODED_HEIGHT_OFFSET = 10;
-static const etc1_uint32 ETC1_PKM_WIDTH_OFFSET = 12;
-static const etc1_uint32 ETC1_PKM_HEIGHT_OFFSET = 14;
-
-static const etc1_uint32 ETC1_RGB_NO_MIPMAPS = 0;
-
-static void writeBEUint16(etc1_byte* pOut, etc1_uint32 data) {
-    pOut[0] = (etc1_byte) (data >> 8);
-    pOut[1] = (etc1_byte) data;
-}
-
-static etc1_uint32 readBEUint16(const etc1_byte* pIn) {
-    return (pIn[0] << 8) | pIn[1];
-}
-
-// Format a PKM header
-
-void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height) {
-    memcpy(pHeader, kMagic, sizeof(kMagic));
-    etc1_uint32 encodedWidth = (width + 3) & ~3;
-    etc1_uint32 encodedHeight = (height + 3) & ~3;
-    writeBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET, ETC1_RGB_NO_MIPMAPS);
-    writeBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET, encodedWidth);
-    writeBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET, encodedHeight);
-    writeBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET, width);
-    writeBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET, height);
-}
-
-// Check if a PKM header is correctly formatted.
-
-etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader) {
-    if (memcmp(pHeader, kMagic, sizeof(kMagic))) {
-        return false;
-    }
-    etc1_uint32 format = readBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET);
-    etc1_uint32 encodedWidth = readBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET);
-    etc1_uint32 encodedHeight = readBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET);
-    etc1_uint32 width = readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
-    etc1_uint32 height = readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
-    return format == ETC1_RGB_NO_MIPMAPS &&
-            encodedWidth >= width && encodedWidth - width < 4 &&
-            encodedHeight >= height && encodedHeight - height < 4;
-}
-
-// Read the image width from a PKM header
-
-etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader) {
-    return readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
-}
-
-// Read the image height from a PKM header
-
-etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader){
-    return readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
-}

+ 0 - 114
3rdparty/etc1/etc1.h

@@ -1,114 +0,0 @@
-// Copyright 2009 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-// This is a fork of the AOSP project ETC1 codec. The original code can be found
-// at the following web site:
-// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/libs/ETC1/
-
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __etc1_h__
-#define __etc1_h__
-
-#define ETC1_ENCODED_BLOCK_SIZE 8
-#define ETC1_DECODED_BLOCK_SIZE 48
-
-#ifndef ETC1_RGB8_OES
-#define ETC1_RGB8_OES 0x8D64
-#endif
-
-typedef unsigned char etc1_byte;
-typedef int etc1_bool;
-typedef unsigned int etc1_uint32;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Encode a block of pixels.
-//
-// pIn is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
-// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
-// value of pixel (x, y).
-//
-// validPixelMask is a 16-bit mask where bit (1 << (x + y * 4)) indicates whether
-// the corresponding (x,y) pixel is valid. Invalid pixel color values are ignored when compressing.
-//
-// pOut is an ETC1 compressed version of the data.
-
-void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 validPixelMask, etc1_byte* pOut);
-
-// Decode a block of pixels.
-//
-// pIn is an ETC1 compressed version of the data.
-//
-// pOut is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
-// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
-// value of pixel (x, y).
-
-void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut);
-
-// Return the size of the encoded image data (does not include size of PKM header).
-
-etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height);
-
-// Encode an entire image.
-// pIn - pointer to the image data. Formatted such that
-//       pixel (x,y) is at pIn + pixelSize * x + stride * y;
-// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
-// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
-// returns non-zero if there is an error.
-
-int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut);
-
-// Decode an entire image.
-// pIn - pointer to encoded data.
-// pOut - pointer to the image data. Will be written such that
-//        pixel (x,y) is at pIn + pixelSize * x + stride * y. Must be
-//        large enough to store entire image.
-// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
-// returns non-zero if there is an error.
-
-int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
-        etc1_uint32 width, etc1_uint32 height,
-        etc1_uint32 pixelSize, etc1_uint32 stride);
-
-// Size of a PKM header, in bytes.
-
-#define ETC_PKM_HEADER_SIZE 16
-
-// Format a PKM header
-
-void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height);
-
-// Check if a PKM header is correctly formatted.
-
-etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader);
-
-// Read the image width from a PKM header
-
-etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader);
-
-// Read the image height from a PKM header
-
-etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif

+ 0 - 24
3rdparty/etc2/LICENSE.txt

@@ -1,24 +0,0 @@
-Copyright (c) 2013, Bartosz Taudul <[email protected]>
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the <organization> nor the
-      names of its contributors may be used to endorse or promote products
-      derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 0 - 90
3rdparty/etc2/Math.hpp

@@ -1,90 +0,0 @@
-#ifndef __DARKRL__MATH_HPP__
-#define __DARKRL__MATH_HPP__
-
-#include <algorithm>
-#include <math.h>
-
-#include "Types.hpp"
-
-template<typename T>
-inline T AlignPOT( T val )
-{
-    if( val == 0 ) return 1;
-    val--;
-    for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
-    {
-        val |= val >> i;
-    }
-    return val + 1;
-}
-
-inline int CountSetBits( uint32 val )
-{
-    val -= ( val >> 1 ) & 0x55555555;
-    val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
-    val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
-    val += val >> 8;
-    val += val >> 16;
-    return val & 0x0000003f;
-}
-
-inline int CountLeadingZeros( uint32 val )
-{
-    val |= val >> 1;
-    val |= val >> 2;
-    val |= val >> 4;
-    val |= val >> 8;
-    val |= val >> 16;
-    return 32 - CountSetBits( val );
-}
-
-inline float sRGB2linear( float v )
-{
-    const float a = 0.055f;
-    if( v <= 0.04045f )
-    {
-        return v / 12.92f;
-    }
-    else
-    {
-        return powf( ( v + a ) / ( 1 + a ), 2.4f );
-    }
-}
-
-inline float linear2sRGB( float v )
-{
-    const float a = 0.055f;
-    if( v <= 0.0031308f )
-    {
-        return 12.92f * v;
-    }
-    else
-    {
-        return ( 1 + a ) * pow( v, 1/2.4f ) - a;
-    }
-}
-
-template<class T>
-inline T SmoothStep( T x )
-{
-    return x*x*(3-2*x);
-}
-
-inline uint8 clampu8( int32 val )
-{
-    return std::min( std::max( 0, val ), 255 );
-}
-
-template<class T>
-inline T sq( T val )
-{
-    return val * val;
-}
-
-static inline int mul8bit( int a, int b )
-{
-    int t = a*b + 128;
-    return ( t + ( t >> 8 ) ) >> 8;
-}
-
-#endif

+ 0 - 51
3rdparty/etc2/ProcessCommon.hpp

@@ -1,51 +0,0 @@
-#ifndef __PROCESSCOMMON_HPP__
-#define __PROCESSCOMMON_HPP__
-
-#include <assert.h>
-#include <stddef.h>
-
-#include "Types.hpp"
-
-template<class T>
-static size_t GetLeastError( const T* err, size_t num )
-{
-    size_t idx = 0;
-    for( size_t i=1; i<num; i++ )
-    {
-        if( err[i] < err[idx] )
-        {
-            idx = i;
-        }
-    }
-    return idx;
-}
-
-static uint64 FixByteOrder( uint64 d )
-{
-    return ( ( d & 0x00000000FFFFFFFF ) ) |
-           ( ( d & 0xFF00000000000000 ) >> 24 ) |
-           ( ( d & 0x000000FF00000000 ) << 24 ) |
-           ( ( d & 0x00FF000000000000 ) >> 8 ) |
-           ( ( d & 0x0000FF0000000000 ) << 8 );
-}
-
-template<class T, class S>
-static uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id )
-{
-    size_t tidx[2];
-    tidx[0] = GetLeastError( terr[0], 8 );
-    tidx[1] = GetLeastError( terr[1], 8 );
-
-    d |= tidx[0] << 26;
-    d |= tidx[1] << 29;
-    for( int i=0; i<16; i++ )
-    {
-        uint64 t = tsel[i][tidx[id[i]%2]];
-        d |= ( t & 0x1 ) << ( i + 32 );
-        d |= ( t & 0x2 ) << ( i + 47 );
-    }
-
-    return d;
-}
-
-#endif

+ 0 - 719
3rdparty/etc2/ProcessRGB.cpp

@@ -1,719 +0,0 @@
-#include <string.h>
-
-#include "Math.hpp"
-#include "ProcessCommon.hpp"
-#include "ProcessRGB.hpp"
-#include "Tables.hpp"
-#include "Types.hpp"
-#include "Vector.hpp"
-
-#include <bx/endian.h>
-
-#ifdef __SSE4_1__
-#  ifdef _MSC_VER
-#    include <intrin.h>
-#    include <Windows.h>
-#  else
-#    include <x86intrin.h>
-#  endif
-#endif
-
-namespace
-{
-
-typedef uint16 v4i[4];
-
-void Average( const uint8* data, v4i* a )
-{
-#ifdef __SSE4_1__
-    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
-    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
-    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
-    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
-
-    __m128i d0l = _mm_unpacklo_epi8(d0, _mm_setzero_si128());
-    __m128i d0h = _mm_unpackhi_epi8(d0, _mm_setzero_si128());
-    __m128i d1l = _mm_unpacklo_epi8(d1, _mm_setzero_si128());
-    __m128i d1h = _mm_unpackhi_epi8(d1, _mm_setzero_si128());
-    __m128i d2l = _mm_unpacklo_epi8(d2, _mm_setzero_si128());
-    __m128i d2h = _mm_unpackhi_epi8(d2, _mm_setzero_si128());
-    __m128i d3l = _mm_unpacklo_epi8(d3, _mm_setzero_si128());
-    __m128i d3h = _mm_unpackhi_epi8(d3, _mm_setzero_si128());
-
-    __m128i sum0 = _mm_add_epi16(d0l, d1l);
-    __m128i sum1 = _mm_add_epi16(d0h, d1h);
-    __m128i sum2 = _mm_add_epi16(d2l, d3l);
-    __m128i sum3 = _mm_add_epi16(d2h, d3h);
-
-    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
-    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
-    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
-    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
-    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
-    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
-    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
-    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
-
-    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
-    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
-    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
-    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
-
-    __m128i a0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b2, b3), _mm_set1_epi32(4)), 3);
-    __m128i a1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b1), _mm_set1_epi32(4)), 3);
-    __m128i a2 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b1, b3), _mm_set1_epi32(4)), 3);
-    __m128i a3 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b2), _mm_set1_epi32(4)), 3);
-
-    _mm_storeu_si128((__m128i*)&a[0], _mm_packus_epi32(_mm_shuffle_epi32(a0, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a1, _MM_SHUFFLE(3, 0, 1, 2))));
-    _mm_storeu_si128((__m128i*)&a[2], _mm_packus_epi32(_mm_shuffle_epi32(a2, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a3, _MM_SHUFFLE(3, 0, 1, 2))));
-#else
-    uint32 r[4];
-    uint32 g[4];
-    uint32 b[4];
-
-    memset(r, 0, sizeof(r));
-    memset(g, 0, sizeof(g));
-    memset(b, 0, sizeof(b));
-
-    for( int j=0; j<4; j++ )
-    {
-        for( int i=0; i<4; i++ )
-        {
-            int index = (j & 2) + (i >> 1);
-            b[index] += *data++;
-            g[index] += *data++;
-            r[index] += *data++;
-            data++;
-        }
-    }
-
-    a[0][0] = uint16( (r[2] + r[3] + 4) / 8 );
-    a[0][1] = uint16( (g[2] + g[3] + 4) / 8 );
-    a[0][2] = uint16( (b[2] + b[3] + 4) / 8 );
-    a[0][3] = 0;
-    a[1][0] = uint16( (r[0] + r[1] + 4) / 8 );
-    a[1][1] = uint16( (g[0] + g[1] + 4) / 8 );
-    a[1][2] = uint16( (b[0] + b[1] + 4) / 8 );
-    a[1][3] = 0;
-    a[2][0] = uint16( (r[1] + r[3] + 4) / 8 );
-    a[2][1] = uint16( (g[1] + g[3] + 4) / 8 );
-    a[2][2] = uint16( (b[1] + b[3] + 4) / 8 );
-    a[2][3] = 0;
-    a[3][0] = uint16( (r[0] + r[2] + 4) / 8 );
-    a[3][1] = uint16( (g[0] + g[2] + 4) / 8 );
-    a[3][2] = uint16( (b[0] + b[2] + 4) / 8 );
-    a[3][3] = 0;
-#endif
-}
-
-void CalcErrorBlock( const uint8* data, uint err[4][4] )
-{
-#ifdef __SSE4_1__
-    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
-    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
-    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
-    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
-
-    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
-    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
-    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
-    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
-
-    __m128i d0l = _mm_unpacklo_epi8(dm0, _mm_setzero_si128());
-    __m128i d0h = _mm_unpackhi_epi8(dm0, _mm_setzero_si128());
-    __m128i d1l = _mm_unpacklo_epi8(dm1, _mm_setzero_si128());
-    __m128i d1h = _mm_unpackhi_epi8(dm1, _mm_setzero_si128());
-    __m128i d2l = _mm_unpacklo_epi8(dm2, _mm_setzero_si128());
-    __m128i d2h = _mm_unpackhi_epi8(dm2, _mm_setzero_si128());
-    __m128i d3l = _mm_unpacklo_epi8(dm3, _mm_setzero_si128());
-    __m128i d3h = _mm_unpackhi_epi8(dm3, _mm_setzero_si128());
-
-    __m128i sum0 = _mm_add_epi16(d0l, d1l);
-    __m128i sum1 = _mm_add_epi16(d0h, d1h);
-    __m128i sum2 = _mm_add_epi16(d2l, d3l);
-    __m128i sum3 = _mm_add_epi16(d2h, d3h);
-
-    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
-    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
-    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
-    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
-    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
-    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
-    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
-    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
-
-    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
-    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
-    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
-    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
-
-    __m128i a0 = _mm_add_epi32(b2, b3);
-    __m128i a1 = _mm_add_epi32(b0, b1);
-    __m128i a2 = _mm_add_epi32(b1, b3);
-    __m128i a3 = _mm_add_epi32(b0, b2);
-
-    _mm_storeu_si128((__m128i*)&err[0], a0);
-    _mm_storeu_si128((__m128i*)&err[1], a1);
-    _mm_storeu_si128((__m128i*)&err[2], a2);
-    _mm_storeu_si128((__m128i*)&err[3], a3);
-#else
-    uint terr[4][4];
-
-    memset(terr, 0, 16 * sizeof(uint));
-
-    for( int j=0; j<4; j++ )
-    {
-        for( int i=0; i<4; i++ )
-        {
-            int index = (j & 2) + (i >> 1);
-            uint d = *data++;
-            terr[index][0] += d;
-            d = *data++;
-            terr[index][1] += d;
-            d = *data++;
-            terr[index][2] += d;
-            data++;
-        }
-    }
-
-    for( int i=0; i<3; i++ )
-    {
-        err[0][i] = terr[2][i] + terr[3][i];
-        err[1][i] = terr[0][i] + terr[1][i];
-        err[2][i] = terr[1][i] + terr[3][i];
-        err[3][i] = terr[0][i] + terr[2][i];
-    }
-    for( int i=0; i<4; i++ )
-    {
-        err[i][3] = 0;
-    }
-#endif
-}
-
-uint CalcError( const uint block[4], const v4i& average )
-{
-    uint err = 0x3FFFFFFF; // Big value to prevent negative values, but small enough to prevent overflow
-    err -= block[0] * 2 * average[2];
-    err -= block[1] * 2 * average[1];
-    err -= block[2] * 2 * average[0];
-    err += 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
-    return err;
-}
-
-void ProcessAverages( v4i* a )
-{
-#ifdef __SSE4_1__
-    for( int i=0; i<2; i++ )
-    {
-        __m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
-
-        __m128i t = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(31)), _mm_set1_epi16(128));
-
-        __m128i c = _mm_srli_epi16(_mm_add_epi16(t, _mm_srli_epi16(t, 8)), 8);
-
-        __m128i c1 = _mm_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
-        __m128i diff = _mm_sub_epi16(c, c1);
-        diff = _mm_max_epi16(diff, _mm_set1_epi16(-4));
-        diff = _mm_min_epi16(diff, _mm_set1_epi16(3));
-
-        __m128i co = _mm_add_epi16(c1, diff);
-
-        c = _mm_blend_epi16(co, c, 0xF0);
-
-        __m128i a0 = _mm_or_si128(_mm_slli_epi16(c, 3), _mm_srli_epi16(c, 2));
-
-        _mm_storeu_si128((__m128i*)a[4+i*2], a0);
-    }
-
-    for( int i=0; i<2; i++ )
-    {
-        __m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
-
-        __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(15)), _mm_set1_epi16(128));
-        __m128i t1 = _mm_srli_epi16(_mm_add_epi16(t0, _mm_srli_epi16(t0, 8)), 8);
-
-        __m128i t2 = _mm_or_si128(t1, _mm_slli_epi16(t1, 4));
-
-        _mm_storeu_si128((__m128i*)a[i*2], t2);
-    }
-#else
-    for( int i=0; i<2; i++ )
-    {
-        for( int j=0; j<3; j++ )
-        {
-            int32 c1 = mul8bit( a[i*2+1][j], 31 );
-            int32 c2 = mul8bit( a[i*2][j], 31 );
-
-            int32 diff = c2 - c1;
-            if( diff > 3 ) diff = 3;
-            else if( diff < -4 ) diff = -4;
-
-            int32 co = c1 + diff;
-
-            a[5+i*2][j] = ( c1 << 3 ) | ( c1 >> 2 );
-            a[4+i*2][j] = ( co << 3 ) | ( co >> 2 );
-        }
-    }
-
-    for( int i=0; i<4; i++ )
-    {
-        a[i][0] = g_avg2[mul8bit( a[i][0], 15 )];
-        a[i][1] = g_avg2[mul8bit( a[i][1], 15 )];
-        a[i][2] = g_avg2[mul8bit( a[i][2], 15 )];
-    }
-#endif
-}
-
-void EncodeAverages( uint64& _d, const v4i* a, size_t idx )
-{
-    uint64 d = _d;
-    d |= ( idx << 24 );
-    size_t base = idx << 1;
-
-    if( ( idx & 0x2 ) == 0 )
-    {
-        for( int i=0; i<3; i++ )
-        {
-            d |= uint64( a[base+0][i] >> 4 ) << ( i*8 );
-            d |= uint64( a[base+1][i] >> 4 ) << ( i*8 + 4 );
-        }
-    }
-    else
-    {
-        for( int i=0; i<3; i++ )
-        {
-            d |= uint64( a[base+1][i] & 0xF8 ) << ( i*8 );
-            int32 c = ( ( a[base+0][i] & 0xF8 ) - ( a[base+1][i] & 0xF8 ) ) >> 3;
-            c &= ~0xFFFFFFF8;
-            d |= ((uint64)c) << ( i*8 );
-        }
-    }
-    _d = d;
-}
-
-uint64 CheckSolid( const uint8* src )
-{
-#ifdef __SSE4_1__
-    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
-    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
-    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
-    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
-
-    __m128i c = _mm_shuffle_epi32(d0, _MM_SHUFFLE(0, 0, 0, 0));
-
-    __m128i c0 = _mm_cmpeq_epi8(d0, c);
-    __m128i c1 = _mm_cmpeq_epi8(d1, c);
-    __m128i c2 = _mm_cmpeq_epi8(d2, c);
-    __m128i c3 = _mm_cmpeq_epi8(d3, c);
-
-    __m128i m0 = _mm_and_si128(c0, c1);
-    __m128i m1 = _mm_and_si128(c2, c3);
-    __m128i m = _mm_and_si128(m0, m1);
-
-    if (!_mm_testc_si128(m, _mm_set1_epi32(-1)))
-    {
-        return 0;
-    }
-#else
-    const uint8* ptr = src + 4;
-    for( int i=1; i<16; i++ )
-    {
-        if( memcmp( src, ptr, 4 ) != 0 )
-        {
-            return 0;
-        }
-        ptr += 4;
-    }
-#endif
-    return 0x02000000 |
-        ( uint( src[0] & 0xF8 ) << 16 ) |
-        ( uint( src[1] & 0xF8 ) << 8 ) |
-        ( uint( src[2] & 0xF8 ) );
-}
-
-void PrepareAverages( v4i a[8], const uint8* src, uint err[4] )
-{
-    Average( src, a );
-    ProcessAverages( a );
-
-    uint errblock[4][4];
-    CalcErrorBlock( src, errblock );
-
-    for( int i=0; i<4; i++ )
-    {
-        err[i/2] += CalcError( errblock[i], a[i] );
-        err[2+i/2] += CalcError( errblock[i], a[i+4] );
-    }
-}
-
-void FindBestFit( uint64 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
-{
-    for( size_t i=0; i<16; i++ )
-    {
-        uint16* sel = tsel[i];
-        uint bid = id[i];
-        uint64* ter = terr[bid%2];
-
-        uint8 b = *data++;
-        uint8 g = *data++;
-        uint8 r = *data++;
-        data++;
-
-        int dr = a[bid][0] - r;
-        int dg = a[bid][1] - g;
-        int db = a[bid][2] - b;
-
-#ifdef __SSE4_1__
-        // Reference implementation
-
-        __m128i pix = _mm_set1_epi32(dr * 77 + dg * 151 + db * 28);
-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
-        __m128i error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[0]));
-        __m128i error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[1]));
-        __m128i error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[0]));
-        __m128i error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[1]));
-
-        __m128i index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
-        __m128i minError0 = _mm_min_epi32(error0, error1);
-
-        __m128i index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
-        __m128i minError1 = _mm_min_epi32(error2, error3);
-
-        __m128i minIndex0 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
-        __m128i minError = _mm_min_epi32(minError0, minError1);
-
-        // Squaring the minimum error to produce correct values when adding
-        __m128i minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
-        __m128i squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
-        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
-        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
-        __m128i minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
-        __m128i squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
-        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
-        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
-
-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
-        error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[2]));
-        error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[3]));
-        error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[2]));
-        error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[3]));
-
-        index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
-        minError0 = _mm_min_epi32(error0, error1);
-
-        index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
-        minError1 = _mm_min_epi32(error2, error3);
-
-        __m128i minIndex1 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
-        minError = _mm_min_epi32(minError0, minError1);
-
-        // Squaring the minimum error to produce correct values when adding
-        minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
-        squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
-        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 2));
-        _mm_storeu_si128(((__m128i*)ter) + 2, squareErrorLow);
-        minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
-        squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
-        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 3));
-        _mm_storeu_si128(((__m128i*)ter) + 3, squareErrorHigh);
-        __m128i minIndex = _mm_packs_epi32(minIndex0, minIndex1);
-        _mm_storeu_si128((__m128i*)sel, minIndex);
-#else
-        int pix = dr * 77 + dg * 151 + db * 28;
-
-        for( int t=0; t<8; t++ )
-        {
-            const int64* tab = g_table256[t];
-            uint idx = 0;
-            uint64 err = sq( tab[0] + pix );
-            for( int j=1; j<4; j++ )
-            {
-                uint64 local = sq( tab[j] + pix );
-                if( local < err )
-                {
-                    err = local;
-                    idx = j;
-                }
-            }
-            *sel++ = idx;
-            *ter++ += err;
-        }
-#endif
-    }
-}
-
-#ifdef __SSE4_1__
-// Non-reference implementation, but faster. Produces same results as the AVX2 version
-void FindBestFit( uint32 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
-{
-    for( size_t i=0; i<16; i++ )
-    {
-        uint16* sel = tsel[i];
-        uint bid = id[i];
-        uint32* ter = terr[bid%2];
-
-        uint8 b = *data++;
-        uint8 g = *data++;
-        uint8 r = *data++;
-        data++;
-
-        int dr = a[bid][0] - r;
-        int dg = a[bid][1] - g;
-        int db = a[bid][2] - b;
-
-        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
-        // This produces slightly different results, but is significant faster
-        __m128i pixel = _mm_set1_epi16(dr * 38 + dg * 76 + db * 14);
-        __m128i pix = _mm_abs_epi16(pixel);
-
-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
-        // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
-        __m128i error0 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[0]));
-        __m128i error1 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[1]));
-
-        __m128i index = _mm_and_si128(_mm_cmplt_epi16(error1, error0), _mm_set1_epi16(1));
-        __m128i minError = _mm_min_epi16(error0, error1);
-
-        // Exploiting symmetry of the selector table and use the sign bit
-        // This produces slightly different results, but is needed to produce same results as AVX2 implementation
-        __m128i indexBit = _mm_andnot_si128(_mm_srli_epi16(pixel, 15), _mm_set1_epi8(-1));
-        __m128i minIndex = _mm_or_si128(index, _mm_add_epi16(indexBit, indexBit));
-
-        // Squaring the minimum error to produce correct values when adding
-        __m128i squareErrorLo = _mm_mullo_epi16(minError, minError);
-        __m128i squareErrorHi = _mm_mulhi_epi16(minError, minError);
-
-        __m128i squareErrorLow = _mm_unpacklo_epi16(squareErrorLo, squareErrorHi);
-        __m128i squareErrorHigh = _mm_unpackhi_epi16(squareErrorLo, squareErrorHi);
-
-        squareErrorLow = _mm_add_epi32(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
-        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
-        squareErrorHigh = _mm_add_epi32(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
-        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
-
-        _mm_storeu_si128((__m128i*)sel, minIndex);
-    }
-}
-#endif
-
-uint8_t convert6(float f)
-{
-    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
-    return (i + 11 - ((i + 11) >> 7) - ((i + 4) >> 7)) >> 3;
-}
-
-uint8_t convert7(float f)
-{
-    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
-    return (i + 9 - ((i + 9) >> 8) - ((i + 6) >> 8)) >> 2;
-}
-
-std::pair<uint64, uint64> Planar(const uint8* src)
-{
-    int32 r = 0;
-    int32 g = 0;
-    int32 b = 0;
-
-    for (int i = 0; i < 16; ++i)
-    {
-        b += src[i * 4 + 0];
-        g += src[i * 4 + 1];
-        r += src[i * 4 + 2];
-    }
-
-    int32 difRyz = 0;
-    int32 difGyz = 0;
-    int32 difByz = 0;
-    int32 difRxz = 0;
-    int32 difGxz = 0;
-    int32 difBxz = 0;
-
-    const int32 scaling[] = { -255, -85, 85, 255 };
-
-    for (int i = 0; i < 16; ++i)
-    {
-        int32 difB = (static_cast<int>(src[i * 4 + 0]) << 4) - b;
-        int32 difG = (static_cast<int>(src[i * 4 + 1]) << 4) - g;
-        int32 difR = (static_cast<int>(src[i * 4 + 2]) << 4) - r;
-
-        difRyz += difR * scaling[i % 4];
-        difGyz += difG * scaling[i % 4];
-        difByz += difB * scaling[i % 4];
-
-        difRxz += difR * scaling[i / 4];
-        difGxz += difG * scaling[i / 4];
-        difBxz += difB * scaling[i / 4];
-    }
-
-    const float scale = -4.0f / ((255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f);
-
-    float aR = difRxz * scale;
-    float aG = difGxz * scale;
-    float aB = difBxz * scale;
-
-    float bR = difRyz * scale;
-    float bG = difGyz * scale;
-    float bB = difByz * scale;
-
-    float dR = r * (4.0f / 16.0f);
-    float dG = g * (4.0f / 16.0f);
-    float dB = b * (4.0f / 16.0f);
-
-    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
-    float cofR = (aR *  255.0f + (bR *  255.0f + dR));
-    float cofG = (aG *  255.0f + (bG *  255.0f + dG));
-    float cofB = (aB *  255.0f + (bB *  255.0f + dB));
-    float chfR = (aR * -425.0f + (bR *  255.0f + dR));
-    float chfG = (aG * -425.0f + (bG *  255.0f + dG));
-    float chfB = (aB * -425.0f + (bB *  255.0f + dB));
-    float cvfR = (aR *  255.0f + (bR * -425.0f + dR));
-    float cvfG = (aG *  255.0f + (bG * -425.0f + dG));
-    float cvfB = (aB *  255.0f + (bB * -425.0f + dB));
-
-    // convert to r6g7b6
-    int32 coR = convert6(cofR);
-    int32 coG = convert7(cofG);
-    int32 coB = convert6(cofB);
-    int32 chR = convert6(chfR);
-    int32 chG = convert7(chfG);
-    int32 chB = convert6(chfB);
-    int32 cvR = convert6(cvfR);
-    int32 cvG = convert7(cvfG);
-    int32 cvB = convert6(cvfB);
-
-    // Error calculation
-    int32 ro0 = coR;
-    int32 go0 = coG;
-    int32 bo0 = coB;
-    int32 ro1 = (ro0 >> 4) | (ro0 << 2);
-    int32 go1 = (go0 >> 6) | (go0 << 1);
-    int32 bo1 = (bo0 >> 4) | (bo0 << 2);
-    int32 ro2 = (ro1 << 2) + 2;
-    int32 go2 = (go1 << 2) + 2;
-    int32 bo2 = (bo1 << 2) + 2;
-
-    int32 rh0 = chR;
-    int32 gh0 = chG;
-    int32 bh0 = chB;
-    int32 rh1 = (rh0 >> 4) | (rh0 << 2);
-    int32 gh1 = (gh0 >> 6) | (gh0 << 1);
-    int32 bh1 = (bh0 >> 4) | (bh0 << 2);
-
-    int32 rh2 = rh1 - ro1;
-    int32 gh2 = gh1 - go1;
-    int32 bh2 = bh1 - bo1;
-
-    int32 rv0 = cvR;
-    int32 gv0 = cvG;
-    int32 bv0 = cvB;
-    int32 rv1 = (rv0 >> 4) | (rv0 << 2);
-    int32 gv1 = (gv0 >> 6) | (gv0 << 1);
-    int32 bv1 = (bv0 >> 4) | (bv0 << 2);
-
-    int32 rv2 = rv1 - ro1;
-    int32 gv2 = gv1 - go1;
-    int32 bv2 = bv1 - bo1;
-
-    uint64 error = 0;
-
-    for (int i = 0; i < 16; ++i)
-    {
-        int32 cR = clampu8((rh2 * (i / 4) + rv2 * (i % 4) + ro2) >> 2);
-        int32 cG = clampu8((gh2 * (i / 4) + gv2 * (i % 4) + go2) >> 2);
-        int32 cB = clampu8((bh2 * (i / 4) + bv2 * (i % 4) + bo2) >> 2);
-
-        int32 difB = static_cast<int>(src[i * 4 + 0]) - cB;
-        int32 difG = static_cast<int>(src[i * 4 + 1]) - cG;
-        int32 difR = static_cast<int>(src[i * 4 + 2]) - cR;
-
-        int32 dif = difR * 38 + difG * 76 + difB * 14;
-
-        error += dif * dif;
-    }
-
-    /**/
-    uint32 rgbv = cvB | (cvG << 6) | (cvR << 13);
-    uint32 rgbh = chB | (chG << 6) | (chR << 13);
-    uint32 hi = rgbv | ((rgbh & 0x1FFF) << 19);
-    uint32 lo = (chR & 0x1) | 0x2 | ((chR << 1) & 0x7C);
-    lo |= ((coB & 0x07) <<  7) | ((coB & 0x18) <<  8) | ((coB & 0x20) << 11);
-    lo |= ((coG & 0x3F) << 17) | ((coG & 0x40) << 18);
-    lo |= coR << 25;
-
-    const int32 idx = (coR & 0x20) | ((coG & 0x20) >> 1) | ((coB & 0x1E) >> 1);
-
-    lo |= g_flags[idx];
-
-    uint64 result = static_cast<uint32>(bx::endianSwap(lo));
-    result |= static_cast<uint64>(static_cast<uint32>(bx::endianSwap(hi))) << 32;
-
-    return std::make_pair(result, error);
-}
-
-template<class T, class S>
-uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id, const uint64 value, const uint64 error)
-{
-    size_t tidx[2];
-    tidx[0] = GetLeastError( terr[0], 8 );
-    tidx[1] = GetLeastError( terr[1], 8 );
-
-    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
-    {
-        return value;
-    }
-
-    d |= tidx[0] << 26;
-    d |= tidx[1] << 29;
-    for( int i=0; i<16; i++ )
-    {
-        uint64 t = tsel[i][tidx[id[i]%2]];
-        d |= ( t & 0x1 ) << ( i + 32 );
-        d |= ( t & 0x2 ) << ( i + 47 );
-    }
-
-    return FixByteOrder(d);
-}
-}
-
-uint64 ProcessRGB( const uint8* src )
-{
-    uint64 d = CheckSolid( src );
-    if( d != 0 ) return d;
-
-    v4i a[8];
-    uint err[4] = {};
-    PrepareAverages( a, src, err );
-    size_t idx = GetLeastError( err, 4 );
-    EncodeAverages( d, a, idx );
-
-#if defined __SSE4_1__ && !defined REFERENCE_IMPLEMENTATION
-    uint32 terr[2][8] = {};
-#else
-    uint64 terr[2][8] = {};
-#endif
-    uint16 tsel[16][8];
-    const uint32* id = g_id[idx];
-    FindBestFit( terr, tsel, a, id, src );
-
-    return FixByteOrder( EncodeSelectors( d, terr, tsel, id ) );
-}
-
-uint64 ProcessRGB_ETC2( const uint8* src )
-{
-    std::pair<uint64, uint64> result = Planar( src );
-
-    uint64 d = 0;
-
-    v4i a[8];
-    uint err[4] = {};
-    PrepareAverages( a, src, err );
-    size_t idx = GetLeastError( err, 4 );
-    EncodeAverages( d, a, idx );
-
-    uint64 terr[2][8] = {};
-    uint16 tsel[16][8];
-    const uint32* id = g_id[idx];
-    FindBestFit( terr, tsel, a, id, src );
-
-    return EncodeSelectors( d, terr, tsel, id, result.first, result.second );
-}

+ 0 - 9
3rdparty/etc2/ProcessRGB.hpp

@@ -1,9 +0,0 @@
-#ifndef __PROCESSRGB_HPP__
-#define __PROCESSRGB_HPP__
-
-#include "Types.hpp"
-
-uint64 ProcessRGB( const uint8* src );
-uint64 ProcessRGB_ETC2( const uint8* src );
-
-#endif

+ 0 - 109
3rdparty/etc2/Tables.cpp

@@ -1,109 +0,0 @@
-#include "Tables.hpp"
-
-const int32 g_table[8][4] = {
-    {  2,  8,   -2,   -8 },
-    {  5, 17,   -5,  -17 },
-    {  9, 29,   -9,  -29 },
-    { 13, 42,  -13,  -42 },
-    { 18, 60,  -18,  -60 },
-    { 24, 80,  -24,  -80 },
-    { 33, 106, -33, -106 },
-    { 47, 183, -47, -183 }
-};
-
-const int64 g_table256[8][4] = {
-    {  2*256,  8*256,   -2*256,   -8*256 },
-    {  5*256, 17*256,   -5*256,  -17*256 },
-    {  9*256, 29*256,   -9*256,  -29*256 },
-    { 13*256, 42*256,  -13*256,  -42*256 },
-    { 18*256, 60*256,  -18*256,  -60*256 },
-    { 24*256, 80*256,  -24*256,  -80*256 },
-    { 33*256, 106*256, -33*256, -106*256 },
-    { 47*256, 183*256, -47*256, -183*256 }
-};
-
-const uint32 g_id[4][16] = {
-    { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
-    { 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
-    { 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
-    { 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
-};
-
-const uint32 g_avg2[16] = {
-    0x00,
-    0x11,
-    0x22,
-    0x33,
-    0x44,
-    0x55,
-    0x66,
-    0x77,
-    0x88,
-    0x99,
-    0xAA,
-    0xBB,
-    0xCC,
-    0xDD,
-    0xEE,
-    0xFF
-};
-
-const uint32 g_flags[64] = {
-    0x80800402, 0x80800402, 0x80800402, 0x80800402,
-    0x80800402, 0x80800402, 0x80800402, 0x8080E002,
-    0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
-    0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
-    0x80000402, 0x80000402, 0x80000402, 0x80000402,
-    0x80000402, 0x80000402, 0x80000402, 0x8000E002,
-    0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
-    0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
-    0x00800402, 0x00800402, 0x00800402, 0x00800402,
-    0x00800402, 0x00800402, 0x00800402, 0x0080E002,
-    0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
-    0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
-    0x00000402, 0x00000402, 0x00000402, 0x00000402,
-    0x00000402, 0x00000402, 0x00000402, 0x0000E002,
-    0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
-    0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
-};
-
-#ifdef __SSE4_1__
-const uint8 g_flags_AVX2[64] =
-{
-    0x63, 0x63, 0x63, 0x63,
-    0x63, 0x63, 0x63, 0x7D,
-    0x63, 0x63, 0x7D, 0x7D,
-    0x63, 0x7D, 0x7D, 0x7D,
-    0x43, 0x43, 0x43, 0x43,
-    0x43, 0x43, 0x43, 0x5D,
-    0x43, 0x43, 0x5D, 0x5D,
-    0x43, 0x5D, 0x5D, 0x5D,
-    0x23, 0x23, 0x23, 0x23,
-    0x23, 0x23, 0x23, 0x3D,
-    0x23, 0x23, 0x3D, 0x3D,
-    0x23, 0x3D, 0x3D, 0x3D,
-    0x03, 0x03, 0x03, 0x03,
-    0x03, 0x03, 0x03, 0x1D,
-    0x03, 0x03, 0x1D, 0x1D,
-    0x03, 0x1D, 0x1D, 0x1D,
-};
-
-const __m128i g_table_SIMD[2] =
-{
-    _mm_setr_epi16(   2,   5,   9,  13,  18,  24,  33,  47),
-    _mm_setr_epi16(   8,  17,  29,  42,  60,  80, 106, 183)
-};
-const __m128i g_table128_SIMD[2] =
-{
-    _mm_setr_epi16(   2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128),
-    _mm_setr_epi16(   8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128)
-};
-const __m128i g_table256_SIMD[4] =
-{
-    _mm_setr_epi32(  2*256,   5*256,   9*256,  13*256),
-    _mm_setr_epi32(  8*256,  17*256,  29*256,  42*256),
-    _mm_setr_epi32( 18*256,  24*256,  33*256,  47*256),
-    _mm_setr_epi32( 60*256,  80*256, 106*256, 183*256)
-};
-#endif
-

+ 0 - 25
3rdparty/etc2/Tables.hpp

@@ -1,25 +0,0 @@
-#ifndef __TABLES_HPP__
-#define __TABLES_HPP__
-
-#include "Types.hpp"
-#ifdef __SSE4_1__
-#include <smmintrin.h>
-#endif
-
-extern const int32 g_table[8][4];
-extern const int64 g_table256[8][4];
-
-extern const uint32 g_id[4][16];
-
-extern const uint32 g_avg2[16];
-
-extern const uint32 g_flags[64];
-
-#ifdef __SSE4_1__
-extern const uint8 g_flags_AVX2[64];
-extern const __m128i g_table_SIMD[2];
-extern const __m128i g_table128_SIMD[2];
-extern const __m128i g_table256_SIMD[4];
-#endif
-
-#endif

+ 0 - 17
3rdparty/etc2/Types.hpp

@@ -1,17 +0,0 @@
-#ifndef __DARKRL__TYPES_HPP__
-#define __DARKRL__TYPES_HPP__
-
-#include <stdint.h>
-
-typedef int8_t      int8;
-typedef uint8_t     uint8;
-typedef int16_t     int16;
-typedef uint16_t    uint16;
-typedef int32_t     int32;
-typedef uint32_t    uint32;
-typedef int64_t     int64;
-typedef uint64_t    uint64;
-
-typedef unsigned int uint;
-
-#endif

+ 0 - 222
3rdparty/etc2/Vector.hpp

@@ -1,222 +0,0 @@
-#ifndef __DARKRL__VECTOR_HPP__
-#define __DARKRL__VECTOR_HPP__
-
-#include <assert.h>
-#include <algorithm>
-#include <math.h>
-
-#include "Math.hpp"
-#include "Types.hpp"
-
-template<class T>
-struct Vector2
-{
-    Vector2() : x( 0 ), y( 0 ) {}
-    Vector2( T v ) : x( v ), y( v ) {}
-    Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
-
-    bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
-    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
-
-    Vector2<T>& operator+=( const Vector2<T>& rhs )
-    {
-        x += rhs.x;
-        y += rhs.y;
-        return *this;
-    }
-    Vector2<T>& operator-=( const Vector2<T>& rhs )
-    {
-        x -= rhs.x;
-        y -= rhs.y;
-        return *this;
-    }
-    Vector2<T>& operator*=( const Vector2<T>& rhs )
-    {
-        x *= rhs.x;
-        y *= rhs.y;
-        return *this;
-    }
-
-    T x, y;
-};
-
-template<class T>
-Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
-{
-    return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
-}
-
-template<class T>
-Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
-{
-    return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
-}
-
-template<class T>
-Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
-{
-    return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
-}
-
-template<class T>
-Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
-{
-    return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
-}
-
-
-typedef Vector2<int32> v2i;
-typedef Vector2<float> v2f;
-
-
-template<class T>
-struct Vector3
-{
-    Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
-    Vector3( T v ) : x( v ), y( v ), z( v ) {}
-    Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
-    template<class Y>
-    Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
-
-    T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
-    void Clamp()
-    {
-        x = std::min( T(1), std::max( T(0), x ) );
-        y = std::min( T(1), std::max( T(0), y ) );
-        z = std::min( T(1), std::max( T(0), z ) );
-    }
-
-    bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
-    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
-
-    T& operator[]( uint idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
-    const T& operator[]( uint idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
-
-    Vector3<T> operator+=( const Vector3<T>& rhs )
-    {
-        x += rhs.x;
-        y += rhs.y;
-        z += rhs.z;
-        return *this;
-    }
-
-    Vector3<T> operator*=( const Vector3<T>& rhs )
-    {
-        x *= rhs.x;
-        y *= rhs.y;
-        z *= rhs.z;
-        return *this;
-    }
-
-    Vector3<T> operator*=( const float& rhs )
-    {
-        x *= rhs;
-        y *= rhs;
-        z *= rhs;
-        return *this;
-    }
-
-    T x, y, z;
-    T padding;
-};
-
-template<class T>
-Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
-}
-
-template<class T>
-Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
-}
-
-template<class T>
-Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
-}
-
-template<class T>
-Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
-{
-    return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
-}
-
-template<class T>
-Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
-{
-    return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
-}
-
-template<class T>
-bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
-{
-    return lhs.Luminance() < rhs.Luminance();
-}
-
-typedef Vector3<int32> v3i;
-typedef Vector3<float> v3f;
-typedef Vector3<uint8> v3b;
-
-
-static inline v3b v3f_to_v3b( const v3f& v )
-{
-    return v3b( uint8( std::min( 1.f, v.x ) * 255 ), uint8( std::min( 1.f, v.y ) * 255 ), uint8( std::min( 1.f, v.z ) * 255 ) );
-}
-
-template<class T>
-Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
-{
-    return v1 + ( v2 - v1 ) * amount;
-}
-
-template<>
-inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
-{
-    return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
-}
-
-template<class T>
-Vector3<T> Desaturate( const Vector3<T>& v )
-{
-    T l = v.Luminance();
-    return Vector3<T>( l, l, l );
-}
-
-template<class T>
-Vector3<T> Desaturate( const Vector3<T>& v, float mul )
-{
-    T l = T( v.Luminance() * mul );
-    return Vector3<T>( l, l, l );
-}
-
-template<class T>
-Vector3<T> pow( const Vector3<T>& base, float exponent )
-{
-    return Vector3<T>(
-        pow( base.x, exponent ),
-        pow( base.y, exponent ),
-        pow( base.z, exponent ) );
-}
-
-template<class T>
-Vector3<T> sRGB2linear( const Vector3<T>& v )
-{
-    return Vector3<T>(
-        sRGB2linear( v.x ),
-        sRGB2linear( v.y ),
-        sRGB2linear( v.z ) );
-}
-
-template<class T>
-Vector3<T> linear2sRGB( const Vector3<T>& v )
-{
-    return Vector3<T>(
-        linear2sRGB( v.x ),
-        linear2sRGB( v.y ),
-        linear2sRGB( v.z ) );
-}
-
-#endif

+ 0 - 32
3rdparty/iqa/LICENSE

@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */

+ 0 - 36
3rdparty/iqa/README.txt

@@ -1,36 +0,0 @@
-Doxygen documentation can be found at: http://tdistler.com/iqa
-
-BUILD:
-
-  All build artifacts end up in build/<configuration>, where <configuration> is
-  'debug' or 'release'.
-
-  Windows:
-    - Open iqa.sln, select 'Debug' or 'Release', and build. The output is a 
-      static library 'iqa.lib'.
-    - To run the tests under the debugger, first right-click the 'test' project,
-      select Properties -> Configuration Properties -> Debugging and set
-      'Working Directory' to '$(OutDir)'. Then start the application.
-
-  Linux:
-    - Change directories into the root of the IQA branch you want to build.
-    - Type `make` for a debug build, or `make RELEASE=1` for a release build.
-      The output is a static library 'libiqa.a'.
-    - Type `make test` (or `make test RELEASE=1`) to build the unit tests.
-    - Type `make clean` (or `make clean RELEASE=1`) to delete all build
-      artifacts.
-    - To run the tests, `cd` to the build/<configuration> directory and type
-      `./test`.
-
-
-USE:
-
-  - Include 'iqa.h' in your source file.
-  - Call iqa_* methods.
-  - Link against the IQA library.
-
-
-HELP & SUPPORT:
-
-  Further help can be found at: https://sourceforge.net/projects/iqa/support
-

+ 0 - 111
3rdparty/iqa/include/convolve.h

@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _CONVOLVE_H_
-#define _CONVOLVE_H_
-
-typedef float (*_iqa_get_pixel)(const float *img, int w, int h, int x, int y, float bnd_const);
-
-/** Out-of-bounds array values are a mirrored reflection of the border values*/
-float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const);
-/** Out-of-bounds array values are set to the nearest border value */
-float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const);
-/** Out-of-bounds array values are set to 'bnd_const' */
-float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const);
-
-
-/** Defines a convolution kernel */
-struct _kernel {
-    float *kernel;          /**< Pointer to the kernel values */
-    int w;                  /**< The kernel width */
-    int h;                  /**< The kernel height */
-    int normalized;         /**< 1 if the kernel values add up to 1. 0 otherwise */
-    _iqa_get_pixel bnd_opt; /**< Defines how out-of-bounds image values are handled */
-    float bnd_const;        /**< If 'bnd_opt' is KBND_CONSTANT, this specifies the out-of-bounds value */
-};
-
-/**
- * @brief Applies the specified kernel to the image.
- * The kernel will be applied to all areas where it fits completely within
- * the image. The resulting image will be smaller by half the kernel width 
- * and height (w - kw/2 and h - kh/2).
- *
- * @param img Image to modify
- * @param w Image width
- * @param h Image height
- * @param k The kernel to apply
- * @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
- *               and kh are the kernel width and height). If 0, the result
- *               will be written to the original image buffer.
- * @param rw Optional. The width of the resulting image will be stored here.
- * @param rh Optional. The height of the resulting image will be stored here.
- */
-void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh);
-
-/**
- * The same as _iqa_convolve() except the kernel is applied to the entire image.
- * In other words, the kernel is applied to all areas where the top-left corner
- * of the kernel is in the image. Out-of-bound pixel value (off the right and
- * bottom edges) are chosen based on the 'bnd_opt' and 'bnd_const' members of
- * the kernel structure. The resulting array is the same size as the input
- * image.
- *
- * @param img Image to modify
- * @param w Image width
- * @param h Image height
- * @param k The kernel to apply
- * @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
- *               and kh are the kernel width and height). If 0, the result
- *               will be written to the original image buffer.
- * @return 0 if successful. Non-zero otherwise.
- */
-int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result);
-
-/**
- * Returns the filtered version of the specified pixel. If no kernel is given,
- * the raw pixel value is returned.
- * 
- * @param img Source image
- * @param w Image width
- * @param h Image height
- * @param x The x location of the pixel to filter
- * @param y The y location of the pixel to filter
- * @param k Optional. The convolution kernel to apply to the pixel.
- * @param kscale The scale of the kernel (for normalization). 1 for normalized
- *               kernels. Required if 'k' is not null.
- * @return The filtered pixel value.
- */
-float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale);
-
-
-#endif /*_CONVOLVE_H_*/

+ 0 - 55
3rdparty/iqa/include/decimate.h

@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _DECIMATE_H_
-#define _DECIMATE_H_
-
-#include "convolve.h"
-
-/**
- * @brief Downsamples (decimates) an image.
- *
- * @param img Image to modify
- * @param w Image width
- * @param h Image height
- * @param factor Decimation factor
- * @param k The kernel to apply (e.g. low-pass filter). Can be 0.
- * @param result Buffer to hold the resulting image (w/factor*h/factor). If 0,
- *               the result will be written to the original image buffer.
- * @param rw Optional. The width of the resulting image will be stored here.
- * @param rh Optional. The height of the resulting image will be stored here.
- * @return 0 on success.
- */
-int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh);
-
-#endif /*_DECIMATE_H_*/

+ 0 - 134
3rdparty/iqa/include/iqa.h

@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _IQA_H_
-#define _IQA_H_
-
-#include "iqa_os.h"
-
-/**
- * Allows fine-grain control of the SSIM algorithm.
- */
-struct iqa_ssim_args {
-    float alpha;    /**< luminance exponent */
-    float beta;     /**< contrast exponent */
-    float gamma;    /**< structure exponent */
-    int L;          /**< dynamic range (2^8 - 1)*/
-    float K1;       /**< stabilization constant 1 */
-    float K2;       /**< stabilization constant 2 */
-    int f;          /**< scale factor. 0=default scaling, 1=no scaling */
-};
-
-/**
- * Allows fine-grain control of the MS-SSIM algorithm.
- */
-struct iqa_ms_ssim_args {
-    int wang;             /**< 1=original algorithm by Wang, et al. 0=MS-SSIM* by Rouse/Hemami (default). */
-    int gaussian;         /**< 1=11x11 Gaussian window (default). 0=8x8 linear window. */
-    int scales;           /**< Number of scaled images to use. Default is 5. */
-    const float *alphas;  /**< Pointer to array of alpha values for each scale. Required if 'scales' isn't 5. */
-    const float *betas;   /**< Pointer to array of beta values for each scale. Required if 'scales' isn't 5. */
-    const float *gammas;  /**< Pointer to array of gamma values for each scale. Required if 'scales' isn't 5. */
-};
-
-/**
- * Calculates the Mean Squared Error between 2 equal-sized 8-bit images.
- * @note The images must have the same width, height, and stride.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @return The MSE.
- */
-float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
-
-/**
- * Calculates the Peak Signal-to-Noise-Ratio between 2 equal-sized 8-bit
- * images.
- * @note The images must have the same width, height, and stride.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @return The PSNR.
- */
-float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
-
-/**
- * Calculates the Structural SIMilarity between 2 equal-sized 8-bit images.
- *
- * See https://ece.uwaterloo.ca/~z70wang/publications/ssim.html
- * @note The images must have the same width, height, and stride.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @param gaussian 0 = 8x8 square window, 1 = 11x11 circular-symmetric Gaussian
- * weighting.
- * @param args Optional SSIM arguments for fine control of the algorithm. 0 for
- * defaults. Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
- * @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
- */
-float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride, 
-    int gaussian, const struct iqa_ssim_args *args);
-
-/**
- * Calculates the Multi-Scale Structural SIMilarity between 2 equal-sized 8-bit
- * images. The default algorithm is MS-SSIM* proposed by Rouse/Hemami 2008.
- *
- * See https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf and
- * http://foulard.ece.cornell.edu/publications/dmr_hvei2008_paper.pdf
- *
- * @note 1. The images must have the same width, height, and stride.
- * @note 2. The minimum image width or height is 2^(scales-1) * filter, where 'filter' is 11
- * if a Gaussian window is being used, or 9 otherwise.
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images.
- * @param h Height of the images.
- * @param stride The length (in bytes) of each horizontal line in the image.
- *               This may be different from the image width.
- * @param args Optional MS-SSIM arguments for fine control of the algorithm. 0
- * for defaults. Defaults are wang=0, scales=5, gaussian=1.
- * @return The mean MS-SSIM over the entire image, or INFINITY if error.
- */
-float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride, 
-    const struct iqa_ms_ssim_args *args);
-
-#endif /*_IQA_H_*/

+ 0 - 68
3rdparty/iqa/include/iqa_os.h

@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _OS_H_
-#define _OS_H_
-
-/* Microsoft tends to implement features early, but they have a high legacy
- * cost because they won't break existing implementations. As such, certain
- * features we take for granted on other platforms (like C99) aren't fully
- * implemented. This file is meant to rectify that.
- */
-
-#ifdef WIN32
-
-#include <windows.h>
-#include <math.h>
-
-#define IQA_INLINE __inline
-
-#ifndef INFINITY
-    #define INFINITY (float)HUGE_VAL /**< Defined in C99 (Windows is C89) */
-#endif /*INFINITY*/
-
-#ifndef NAN
-    static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
-    #define NAN (*(const float *) __nan) /**< Defined in C99 (Windows is C99) */
-#endif
-
-#define IQA_EXPORT __declspec(dllexport)
-
-#else /* !Windows */
-
-#define IQA_INLINE inline
-#define IQA_EXPORT
-
-#endif
-
-#endif /* _OS_H_ */

+ 0 - 64
3rdparty/iqa/include/math_utils.h

@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _MATH_UTILS_H_
-#define _MATH_UTILS_H_
-
-#include "iqa_os.h"
-#include <math.h>
-
-/**
- * Rounds a float to the nearest integer.
- */
-IQA_EXPORT int _round(float a);
-
-IQA_EXPORT int _max(int x, int y);
-
-IQA_EXPORT int _min(int x, int y);
-
-
-/** 
- * Compares 2 floats to the specified digit of precision.
- * @return 0 if equal, 1 otherwise.
- */
-IQA_EXPORT int _cmp_float(float a, float b, int digits);
-
-
-/** 
- * Compares 2 matrices with the specified precision. 'b' is assumed to be the
- * same size as 'a' or smaller.
- * @return 0 if equal, 1 otherwise
- */
-IQA_EXPORT int _matrix_cmp(const float *a, const float *b, int w, int h, int digits);
-
-#endif /*_MATH_UTILS_H_*/

+ 0 - 117
3rdparty/iqa/include/ssim.h

@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SSIM_H_
-#define _SSIM_H_
-
-#include "convolve.h"
-
-/*
- * Circular-symmetric Gaussian weighting.
- * h(x,y) = hg(x,y)/SUM(SUM(hg)) , for normalization to 1.0
- * hg(x,y) = e^( -0.5*( (x^2+y^2)/sigma^2 ) ) , where sigma was 1.5
- */
-#define GAUSSIAN_LEN 11
-static const float g_gaussian_window[GAUSSIAN_LEN][GAUSSIAN_LEN] = {
-    {0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
-    {0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
-    {0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
-    {0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
-    {0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
-    {0.000274f, 0.002021f, 0.009577f, 0.029091f, 0.056662f, 0.070762f, 0.056662f, 0.029091f, 0.009577f, 0.002021f, 0.000274f},
-    {0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
-    {0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
-    {0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
-    {0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
-    {0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
-};
-
-/*
- * Equal weight square window.
- * Each pixel is equally weighted (1/64) so that SUM(x) = 1.0
- */
-#define SQUARE_LEN 8
-static const float g_square_window[SQUARE_LEN][SQUARE_LEN] = {
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
-};
-
-/* Holds intermediate SSIM values for map-reduce operation. */
-struct _ssim_int {
-    double l;
-    double c;
-    double s;
-};
-
-/* Defines the pointers to the map-reduce functions. */
-typedef int (*_map)(const struct _ssim_int *, void *);
-typedef float (*_reduce)(int, int, void *);
-
-/* Arguments for map-reduce. The 'context' is user-defined. */
-struct _map_reduce {
-    _map map;
-    _reduce reduce;
-    void *context;
-};
-
-/**
- * Private method that calculates the SSIM value on a pre-processed image.
- *
- * The input images must have stride==width. This method does not scale.
- *
- * @note Image buffers are modified.
- *
- * Map-reduce is used for doing the final SSIM calculation. The map function is
- * called for every pixel, and the reduce is called at the end. The context is
- * caller-defined and *not* modified by this method.
- *
- * @param ref Original reference image
- * @param cmp Distorted image
- * @param w Width of the images
- * @param h Height of the images
- * @param k The kernel used as the window function
- * @param mr Optional map-reduce functions to use to calculate SSIM. Required
- *           if 'args' is not null. Ignored if 'args' is null.
- * @param args Optional SSIM arguments for fine control of the algorithm. 0 for defaults.
- *             Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
- * @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
- */
-float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args);
-
-#endif /* _SSIM_H_ */

+ 0 - 195
3rdparty/iqa/source/convolve.c

@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "convolve.h"
-#include <stdlib.h>
-
-float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const)
-{
-    (void)bnd_const;
-    if (x<0) x=-1-x;
-    else if (x>=w) x=(w-(x-w))-1;
-    if (y<0) y=-1-y;
-    else if (y>=h) y=(h-(y-h))-1;
-    return img[y*w + x];
-}
-
-float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const)
-{
-    (void)bnd_const;
-    if (x<0) x=0;
-    if (x>=w) x=w-1;
-    if (y<0) y=0;
-    if (y>=h) y=h-1;
-    return img[y*w + x];
-}
-
-float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const)
-{
-    if (x<0) x=0;
-    if (y<0) y=0;
-    if (x>=w || y>=h)
-        return bnd_const;
-    return img[y*w + x];
-}
-
-static float _calc_scale(const struct _kernel *k)
-{
-    int ii,k_len;
-    double sum=0.0;
-
-    if (k->normalized)
-        return 1.0f;
-    else {
-        k_len = k->w * k->h;
-        for (ii=0; ii<k_len; ++ii)
-            sum += k->kernel[ii];
-        if (sum != 0.0)
-            return (float)(1.0 / sum);
-        return 1.0f;
-    }
-}
-
-void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh)
-{
-    int x,y,kx,ky,u,v;
-    int uc = k->w/2;
-    int vc = k->h/2;
-    int kw_even = (k->w&1)?0:1;
-    int kh_even = (k->h&1)?0:1;
-    int dst_w = w - k->w + 1;
-    int dst_h = h - k->h + 1;
-    int img_offset,k_offset;
-    double sum;
-    float scale, *dst=result;
-
-    if (!dst)
-        dst = img; /* Convolve in-place */
-
-    /* Kernel is applied to all positions where the kernel is fully contained
-     * in the image */
-    scale = _calc_scale(k);
-    for (y=0; y < dst_h; ++y) {
-        for (x=0; x < dst_w; ++x) {
-            sum = 0.0;
-            k_offset = 0;
-            ky = y+vc;
-            kx = x+uc;
-            for (v=-vc; v <= vc-kh_even; ++v) {
-                img_offset = (ky+v)*w + kx;
-                for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
-                    sum += img[img_offset+u] * k->kernel[k_offset];
-                }
-            }
-            dst[y*dst_w + x] = (float)(sum * scale);
-        }
-    }
-
-    if (rw) *rw = dst_w;
-    if (rh) *rh = dst_h;
-}
-
-int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result)
-{
-    int x,y;
-    int img_offset;
-    float scale, *dst=result;
-
-    if (!k || !k->bnd_opt)
-        return 1;
-
-    if (!dst) {
-        dst = (float*)malloc(w*h*sizeof(float));
-        if (!dst)
-            return 2;
-    }
-
-    scale = _calc_scale(k);
-
-    /* Kernel is applied to all positions where top-left corner is in the image */
-    for (y=0; y < h; ++y) {
-        for (x=0; x < w; ++x) {
-            dst[y*w + x] = _iqa_filter_pixel(img, w, h, x, y, k, scale);
-        }
-    }
-
-    /* If no result buffer given, copy results to image buffer */
-    if (!result) {
-        for (y=0; y<h; ++y) {
-            img_offset = y*w;
-            for (x=0; x<w; ++x, ++img_offset) {
-                img[img_offset] = dst[img_offset];
-            }
-        }
-        free(dst);
-    }
-    return 0;
-}
-
-float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale)
-{
-    int u,v,uc,vc;
-    int kw_even,kh_even;
-    int x_edge_left,x_edge_right,y_edge_top,y_edge_bottom;
-    int edge,img_offset,k_offset;
-    double sum;
-
-    if (!k)
-        return img[y*w + x];
-
-    uc = k->w/2;
-    vc = k->h/2;
-    kw_even = (k->w&1)?0:1;
-    kh_even = (k->h&1)?0:1;
-    x_edge_left  = uc;
-    x_edge_right = w-uc;
-    y_edge_top = vc;
-    y_edge_bottom = h-vc;
-
-    edge = 0;
-    if (x < x_edge_left || y < y_edge_top || x >= x_edge_right || y >= y_edge_bottom)
-        edge = 1;
-
-    sum = 0.0;
-    k_offset = 0;
-    for (v=-vc; v <= vc-kh_even; ++v) {
-        img_offset = (y+v)*w + x;
-        for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
-            if (!edge)
-                sum += img[img_offset+u] * k->kernel[k_offset];
-            else
-                sum += k->bnd_opt(img, w, h, x+u, y+v, k->bnd_const) * k->kernel[k_offset];
-        }
-    }
-    return (float)(sum * kscale);
-}

+ 0 - 59
3rdparty/iqa/source/decimate.c

@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "decimate.h"
-#include <stdlib.h>
-
-int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh)
-{
-    int x,y;
-    int sw = w/factor + (w&1);
-    int sh = h/factor + (h&1);
-    int dst_offset;
-    float *dst=img;
-
-    if (result)
-        dst = result;
-
-    /* Downsample */
-    for (y=0; y<sh; ++y) {
-        dst_offset = y*sw;
-        for (x=0; x<sw; ++x,++dst_offset) {
-            dst[dst_offset] = _iqa_filter_pixel(img, w, h, x*factor, y*factor, k, 1.0f);
-        }
-    }
-    
-    if (rw) *rw = sw;
-    if (rh) *rh = sh;
-    return 0;
-}

+ 0 - 82
3rdparty/iqa/source/math_utils.c

@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "math_utils.h"
-#include <math.h>
-
-int _round(float a)
-{
-    int sign_a = a > 0.0f ? 1 : -1;
-    return a-(int)a >= 0.5 ? (int)a + sign_a : (int)a;
-}
-
-int _max(int x, int y)
-{
-    return x >= y ? x : y;
-}
-
-int _min(int x, int y)
-{
-    return x <= y ? x : y;
-}
-
-int _cmp_float(float a, float b, int digits)
-{
-    /* Round */
-    int sign_a = a > 0.0f ? 1 : -1;
-    int sign_b = b > 0.0f ? 1 : -1;
-    double scale = pow(10.0, (double)digits);
-    double ax = a * scale;
-    double bx = b * scale;
-    int ai = ax-(int)ax >= 0.5 ? (int)ax + sign_a : (int)ax;
-    int bi = bx-(int)bx >= 0.5 ? (int)bx + sign_b : (int)bx;
-
-    /* Compare */
-    return ai == bi ? 0 : 1;
-}
-
-int _matrix_cmp(const float *a, const float *b, int w, int h, int digits)
-{
-    int offset;
-    int result=0;
-    int len=w*h;
-    for (offset=0; offset<len; ++offset) {
-        if (_cmp_float(a[offset], b[offset], digits)) {
-            result = 1;
-            break;
-        }
-    }
-
-    return result;
-}
-

+ 0 - 277
3rdparty/iqa/source/ms_ssim.c

@@ -1,277 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-#include "ssim.h"
-#include "decimate.h"
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* Default number of scales */
-#define SCALES  5
-
-/* Low-pass filter for down-sampling (9/7 biorthogonal wavelet filter) */
-#define LPF_LEN 9
-static const float g_lpf[LPF_LEN][LPF_LEN] = {
-   { 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
-   {-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
-   {-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
-   { 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
-   { 0.016114f,-0.010146f,-0.047149f, 0.160885f, 0.363505f, 0.160885f,-0.047149f,-0.010146f, 0.016114f},
-   { 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
-   {-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
-   {-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
-   { 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
-};
-
-/* Alpha, beta, and gamma values for each scale */
-static float g_alphas[] = { 0.0000f, 0.0000f, 0.0000f, 0.0000f, 0.1333f };
-static float g_betas[]  = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
-static float g_gammas[] = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
-
-
-struct _context {
-    double l;  /* Luminance */
-    double c;  /* Contrast */
-    double s;  /* Structure */
-    float alpha;
-    float beta;
-    float gamma;
-};
-
-/* Called for each pixel */
-int _ms_ssim_map(const struct _ssim_int *si, void *ctx)
-{
-    struct _context *ms_ctx = (struct _context*)ctx;
-    ms_ctx->l += si->l;
-    ms_ctx->c += si->c;
-    ms_ctx->s += si->s;
-    return 0;
-}
-
-/* Called to calculate the final result */
-float _ms_ssim_reduce(int w, int h, void *ctx)
-{
-    double size = (double)(w*h);
-    struct _context *ms_ctx = (struct _context*)ctx;
-    ms_ctx->l = pow(ms_ctx->l / size, (double)ms_ctx->alpha);
-    ms_ctx->c = pow(ms_ctx->c / size, (double)ms_ctx->beta);
-    ms_ctx->s = pow(fabs(ms_ctx->s / size), (double)ms_ctx->gamma);
-    return (float)(ms_ctx->l * ms_ctx->c * ms_ctx->s);
-}
-
-/* Releases the scaled buffers */
-void _free_buffers(float **buf, int scales)
-{
-    int idx;
-    for (idx=0; idx<scales; ++idx)
-        free(buf[idx]);
-}
-
-/* Allocates the scaled buffers. If error, all buffers are free'd */
-int _alloc_buffers(float **buf, int w, int h, int scales)
-{
-    int idx;
-    int cur_w = w;
-    int cur_h = h;
-    for (idx=0; idx<scales; ++idx) {
-        buf[idx] = (float*)malloc(cur_w*cur_h*sizeof(float));
-        if (!buf[idx]) {
-            _free_buffers(buf, idx);
-            return 1;
-        }
-        cur_w = cur_w/2 + (cur_w&1);
-        cur_h = cur_h/2 + (cur_h&1);
-    }
-    return 0;
-}
-
-/*
- * MS_SSIM(X,Y) = Lm(x,y)^aM * MULT[j=1->M]( Cj(x,y)^bj  *  Sj(x,y)^gj )
- * where,
- *  L = mean
- *  C = variance
- *  S = cross-correlation
- *
- *  b1=g1=0.0448, b2=g2=0.2856, b3=g3=0.3001, b4=g4=0.2363, a5=b5=g5=0.1333
- */
-float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, 
-    int stride, const struct iqa_ms_ssim_args *args)
-{
-    int wang=0;
-    int scales=SCALES;
-    int gauss=1;
-    const float *alphas=g_alphas, *betas=g_betas, *gammas=g_gammas;
-    int idx,x,y,cur_w,cur_h;
-    int offset,src_offset;
-    float **ref_imgs, **cmp_imgs; /* Array of pointers to scaled images */
-    float msssim;
-    struct _kernel lpf, window;
-    struct iqa_ssim_args s_args;
-    struct _map_reduce mr;
-    struct _context ms_ctx;
-
-    if (args) {
-        wang   = args->wang;
-        gauss  = args->gaussian;
-        scales = args->scales;
-        if (args->alphas)
-            alphas = args->alphas;
-        if (args->betas)
-            betas  = args->betas;
-        if (args->gammas)
-            gammas = args->gammas;
-    }
-
-    /* Make sure we won't scale below 1x1 */
-    cur_w = w;
-    cur_h = h;
-    for (idx=0; idx<scales; ++idx) {
-        if ( gauss ? cur_w<GAUSSIAN_LEN || cur_h<GAUSSIAN_LEN : cur_w<LPF_LEN || cur_h<LPF_LEN )
-            return INFINITY;
-        cur_w /= 2;
-        cur_h /= 2;
-    }
-
-    window.kernel = (float*)g_square_window;
-    window.w = window.h = SQUARE_LEN;
-    window.normalized = 1;
-    window.bnd_opt = KBND_SYMMETRIC;
-    if (gauss) {
-        window.kernel = (float*)g_gaussian_window;
-        window.w = window.h = GAUSSIAN_LEN;
-    }
-
-    mr.map     = _ms_ssim_map;
-    mr.reduce  = _ms_ssim_reduce;
-
-    /* Allocate the scaled image buffers */
-    ref_imgs = (float**)malloc(scales*sizeof(float*));
-    cmp_imgs = (float**)malloc(scales*sizeof(float*));
-    if (!ref_imgs || !cmp_imgs) {
-        if (ref_imgs) free(ref_imgs);
-        if (cmp_imgs) free(cmp_imgs);
-        return INFINITY;
-    }
-    if (_alloc_buffers(ref_imgs, w, h, scales)) {
-        free(ref_imgs);
-        free(cmp_imgs);
-        return INFINITY;
-    }
-    if (_alloc_buffers(cmp_imgs, w, h, scales)) {
-        _free_buffers(ref_imgs, scales);
-        free(ref_imgs);
-        free(cmp_imgs);
-        return INFINITY;
-    }
-
-    /* Copy original images into first scale buffer, forcing stride = width. */
-    for (y=0; y<h; ++y) {
-        src_offset = y*stride;
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset, ++src_offset) {
-            ref_imgs[0][offset] = (float)ref[src_offset];
-            cmp_imgs[0][offset] = (float)cmp[src_offset];
-        }
-    }
-
-    /* Create scaled versions of the images */
-    cur_w=w;
-    cur_h=h;
-    lpf.kernel = (float*)g_lpf;
-    lpf.w = lpf.h = LPF_LEN;
-    lpf.normalized = 1;
-    lpf.bnd_opt = KBND_SYMMETRIC;
-    for (idx=1; idx<scales; ++idx) {
-        if (_iqa_decimate(ref_imgs[idx-1], cur_w, cur_h, 2, &lpf, ref_imgs[idx], 0, 0) ||
-            _iqa_decimate(cmp_imgs[idx-1], cur_w, cur_h, 2, &lpf, cmp_imgs[idx], &cur_w, &cur_h))
-        {
-            _free_buffers(ref_imgs, scales);
-            _free_buffers(cmp_imgs, scales);
-            free(ref_imgs);
-            free(cmp_imgs);
-            return INFINITY;
-        }
-    }
-
-    cur_w=w;
-    cur_h=h;
-    msssim = 1.0;
-    for (idx=0; idx<scales; ++idx) {
-
-        ms_ctx.l = 0;
-        ms_ctx.c = 0;
-        ms_ctx.s = 0;
-        ms_ctx.alpha = alphas[idx];
-        ms_ctx.beta  = betas[idx];
-        ms_ctx.gamma = gammas[idx];
-
-        if (!wang) {
-            /* MS-SSIM* (Rouse/Hemami) */
-            s_args.alpha = 1.0f;
-            s_args.beta  = 1.0f;
-            s_args.gamma = 1.0f;
-            s_args.K1 = 0.0f; /* Force stabilization constants to 0 */
-            s_args.K2 = 0.0f;
-            s_args.L  = 255;
-            s_args.f  = 1; /* Don't resize */
-            mr.context = &ms_ctx;
-            msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
-        }
-        else {
-            /* MS-SSIM (Wang) */
-            s_args.alpha = 1.0f;
-            s_args.beta  = 1.0f;
-            s_args.gamma = 1.0f;
-            s_args.K1 = 0.01f;
-            s_args.K2 = 0.03f;
-            s_args.L  = 255;
-            s_args.f  = 1; /* Don't resize */
-            mr.context = &ms_ctx;
-            msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
-        }
-
-        if (msssim == INFINITY)
-            break;
-        cur_w = cur_w/2 + (cur_w&1);
-        cur_h = cur_h/2 + (cur_h&1);
-    }
-
-    _free_buffers(ref_imgs, scales);
-    _free_buffers(cmp_imgs, scales);
-    free(ref_imgs);
-    free(cmp_imgs);
-
-    return msssim;
-}

+ 0 - 50
3rdparty/iqa/source/mse.c

@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-
-/* MSE(a,b) = 1/N * SUM((a-b)^2) */
-float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
-{
-    int error, offset;
-    unsigned long long sum=0;
-    int ww,hh;
-    for (hh=0; hh<h; ++hh) {
-        offset = hh*stride;
-        for (ww=0; ww<w; ++ww, ++offset) {
-            error = ref[offset] - cmp[offset];
-            sum += error * error;
-        }
-    }
-    return (float)( (double)sum / (double)(w*h) );
-}

+ 0 - 42
3rdparty/iqa/source/psnr.c

@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-#include <math.h>
-
-/* PSNR(a,b) = 10*log10(L^2 / MSE(a,b)), where L=2^b - 1 (8bit = 255) */
-float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
-{
-    const int L_sqd = 255 * 255;
-    return (float)( 10.0 * log10( L_sqd / iqa_mse(ref,cmp,w,h,stride) ) );
-}

+ 0 - 322
3rdparty/iqa/source/ssim.c

@@ -1,322 +0,0 @@
-/*
- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
- * All rights reserved.
- *
- * The BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, 
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * - Neither the name of the tdistler.com nor the names of its contributors may
- *   be used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "iqa.h"
-#include "convolve.h"
-#include "decimate.h"
-#include "math_utils.h"
-#include "ssim.h"
-#include <stdlib.h>
-#include <math.h>
-
-
-/* Forward declarations. */
-IQA_INLINE static double _calc_luminance(float, float, float, float);
-IQA_INLINE static double _calc_contrast(double, float, float, float, float);
-IQA_INLINE static double _calc_structure(float, double, float, float, float, float);
-static int _ssim_map(const struct _ssim_int *, void *);
-static float _ssim_reduce(int, int, void *);
-
-/* 
- * SSIM(x,y)=(2*ux*uy + C1)*(2sxy + C2) / (ux^2 + uy^2 + C1)*(sx^2 + sy^2 + C2)
- * where,
- *  ux = SUM(w*x)
- *  sx = (SUM(w*(x-ux)^2)^0.5
- *  sxy = SUM(w*(x-ux)*(y-uy))
- *
- * Returns mean SSIM. MSSIM(X,Y) = 1/M * SUM(SSIM(x,y))
- */
-float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride,
-    int gaussian, const struct iqa_ssim_args *args)
-{
-    int scale;
-    int x,y,src_offset,offset;
-    float *ref_f,*cmp_f;
-    struct _kernel low_pass;
-    struct _kernel window;
-    float result;
-    double ssim_sum=0.0;
-    struct _map_reduce mr;
-
-    /* Initialize algorithm parameters */
-    scale = _max( 1, _round( (float)_min(w,h) / 256.0f ) );
-    if (args) {
-        if(args->f)
-            scale = args->f;
-        mr.map     = _ssim_map;
-        mr.reduce  = _ssim_reduce;
-        mr.context = (void*)&ssim_sum;
-    }
-    window.kernel = (float*)g_square_window;
-    window.w = window.h = SQUARE_LEN;
-    window.normalized = 1;
-    window.bnd_opt = KBND_SYMMETRIC;
-    if (gaussian) {
-        window.kernel = (float*)g_gaussian_window;
-        window.w = window.h = GAUSSIAN_LEN;
-    }
-
-    /* Convert image values to floats. Forcing stride = width. */
-    ref_f = (float*)malloc(w*h*sizeof(float));
-    cmp_f = (float*)malloc(w*h*sizeof(float));
-    if (!ref_f || !cmp_f) {
-        if (ref_f) free(ref_f);
-        if (cmp_f) free(cmp_f);
-        return INFINITY;
-    }
-    for (y=0; y<h; ++y) {
-        src_offset = y*stride;
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset, ++src_offset) {
-            ref_f[offset] = (float)ref[src_offset];
-            cmp_f[offset] = (float)cmp[src_offset];
-        }
-    }
-
-    /* Scale the images down if required */
-    if (scale > 1) {
-        /* Generate simple low-pass filter */
-        low_pass.kernel = (float*)malloc(scale*scale*sizeof(float));
-        if (!low_pass.kernel) {
-            free(ref_f);
-            free(cmp_f);
-            return INFINITY;
-        }
-        low_pass.w = low_pass.h = scale;
-        low_pass.normalized = 0;
-        low_pass.bnd_opt = KBND_SYMMETRIC;
-        for (offset=0; offset<scale*scale; ++offset)
-            low_pass.kernel[offset] = 1.0f/(scale*scale);
-
-        /* Resample */
-        if (_iqa_decimate(ref_f, w, h, scale, &low_pass, 0, 0, 0) ||
-            _iqa_decimate(cmp_f, w, h, scale, &low_pass, 0, &w, &h)) { /* Update w/h */
-            free(ref_f);
-            free(cmp_f);
-            free(low_pass.kernel);
-            return INFINITY;
-        }
-        free(low_pass.kernel);
-    }
-
-    result = _iqa_ssim(ref_f, cmp_f, w, h, &window, &mr, args);
-    
-    free(ref_f);
-    free(cmp_f);
-
-    return result;
-}
-
-
-/* _iqa_ssim */
-float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args)
-{
-    float alpha=1.0f, beta=1.0f, gamma=1.0f;
-    int L=255;
-    float K1=0.01f, K2=0.03f;
-    float C1,C2,C3;
-    int x,y,offset;
-    float *ref_mu,*cmp_mu,*ref_sigma_sqd,*cmp_sigma_sqd,*sigma_both;
-    double ssim_sum, numerator, denominator;
-    double luminance_comp, contrast_comp, structure_comp, sigma_root;
-    struct _ssim_int sint;
-
-    /* Initialize algorithm parameters */
-    if (args) {
-        if (!mr)
-            return INFINITY;
-        alpha = args->alpha;
-        beta  = args->beta;
-        gamma = args->gamma;
-        L     = args->L;
-        K1    = args->K1;
-        K2    = args->K2;
-    }
-    C1 = (K1*L)*(K1*L);
-    C2 = (K2*L)*(K2*L);
-    C3 = C2 / 2.0f;
-
-    ref_mu = (float*)malloc(w*h*sizeof(float));
-    cmp_mu = (float*)malloc(w*h*sizeof(float));
-    ref_sigma_sqd = (float*)malloc(w*h*sizeof(float));
-    cmp_sigma_sqd = (float*)malloc(w*h*sizeof(float));
-    sigma_both = (float*)malloc(w*h*sizeof(float));
-    if (!ref_mu || !cmp_mu || !ref_sigma_sqd || !cmp_sigma_sqd || !sigma_both) {
-        if (ref_mu) free(ref_mu);
-        if (cmp_mu) free(cmp_mu);
-        if (ref_sigma_sqd) free(ref_sigma_sqd);
-        if (cmp_sigma_sqd) free(cmp_sigma_sqd);
-        if (sigma_both) free(sigma_both);
-        return INFINITY;
-    }
-
-    /* Calculate mean */
-    _iqa_convolve(ref, w, h, k, ref_mu, 0, 0);
-    _iqa_convolve(cmp, w, h, k, cmp_mu, 0, 0);
-
-    for (y=0; y<h; ++y) {
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset) {
-            ref_sigma_sqd[offset] = ref[offset] * ref[offset];
-            cmp_sigma_sqd[offset] = cmp[offset] * cmp[offset];
-            sigma_both[offset] = ref[offset] * cmp[offset];
-        }
-    }
-
-    /* Calculate sigma */
-    _iqa_convolve(ref_sigma_sqd, w, h, k, 0, 0, 0);
-    _iqa_convolve(cmp_sigma_sqd, w, h, k, 0, 0, 0);
-    _iqa_convolve(sigma_both, w, h, k, 0, &w, &h); /* Update the width and height */
-
-    /* The convolution results are smaller by the kernel width and height */
-    for (y=0; y<h; ++y) {
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset) {
-            ref_sigma_sqd[offset] -= ref_mu[offset] * ref_mu[offset];
-            cmp_sigma_sqd[offset] -= cmp_mu[offset] * cmp_mu[offset];
-            sigma_both[offset] -= ref_mu[offset] * cmp_mu[offset];
-        }
-    }
-
-    ssim_sum = 0.0;
-    for (y=0; y<h; ++y) {
-        offset = y*w;
-        for (x=0; x<w; ++x, ++offset) {
-
-            if (!args) {
-                /* The default case */
-                numerator   = (2.0 * ref_mu[offset] * cmp_mu[offset] + C1) * (2.0 * sigma_both[offset] + C2);
-                denominator = (ref_mu[offset]*ref_mu[offset] + cmp_mu[offset]*cmp_mu[offset] + C1) * 
-                    (ref_sigma_sqd[offset] + cmp_sigma_sqd[offset] + C2);
-                ssim_sum += numerator / denominator;
-            }
-            else {
-                /* User tweaked alpha, beta, or gamma */
-
-                /* passing a negative number to sqrt() cause a domain error */
-                if (ref_sigma_sqd[offset] < 0.0f)
-                    ref_sigma_sqd[offset] = 0.0f;
-                if (cmp_sigma_sqd[offset] < 0.0f)
-                    cmp_sigma_sqd[offset] = 0.0f;
-                sigma_root = sqrt(ref_sigma_sqd[offset] * cmp_sigma_sqd[offset]);
-
-                luminance_comp = _calc_luminance(ref_mu[offset], cmp_mu[offset], C1, alpha);
-                contrast_comp  = _calc_contrast(sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C2, beta);
-                structure_comp = _calc_structure(sigma_both[offset], sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C3, gamma);
-
-                sint.l = luminance_comp;
-                sint.c = contrast_comp;
-                sint.s = structure_comp;
-
-                if (mr->map(&sint, mr->context))
-                    return INFINITY;
-            }
-        }
-    }
-
-    free(ref_mu);
-    free(cmp_mu);
-    free(ref_sigma_sqd);
-    free(cmp_sigma_sqd);
-    free(sigma_both);
-
-    if (!args)
-        return (float)(ssim_sum / (double)(w*h));
-    return mr->reduce(w, h, mr->context);
-}
-
-
-/* _ssim_map */
-int _ssim_map(const struct _ssim_int *si, void *ctx)
-{
-    double *ssim_sum = (double*)ctx;
-    *ssim_sum += si->l * si->c * si->s;
-    return 0;
-}
-
-/* _ssim_reduce */
-float _ssim_reduce(int w, int h, void *ctx)
-{
-    double *ssim_sum = (double*)ctx;
-    return (float)(*ssim_sum / (double)(w*h));
-}
-
-
-/* _calc_luminance */
-IQA_INLINE static double _calc_luminance(float mu1, float mu2, float C1, float alpha)
-{
-    double result;
-    float sign;
-    /* For MS-SSIM* */
-    if (C1 == 0 && mu1*mu1 == 0 && mu2*mu2 == 0)
-        return 1.0;
-    result = (2.0 * mu1 * mu2 + C1) / (mu1*mu1 + mu2*mu2 + C1);
-    if (alpha == 1.0f)
-        return result;
-    sign = result < 0.0 ? -1.0f : 1.0f;
-    return sign * pow(fabs(result),(double)alpha);
-}
-
-/* _calc_contrast */
-IQA_INLINE static double _calc_contrast(double sigma_comb_12, float sigma1_sqd, float sigma2_sqd, float C2, float beta)
-{
-    double result;
-    float sign;
-    /* For MS-SSIM* */
-    if (C2 == 0 && sigma1_sqd + sigma2_sqd == 0)
-        return 1.0;
-    result = (2.0 * sigma_comb_12 + C2) / (sigma1_sqd + sigma2_sqd + C2);
-    if (beta == 1.0f)
-        return result;
-    sign = result < 0.0 ? -1.0f : 1.0f;
-    return sign * pow(fabs(result),(double)beta);
-}
-
-/* _calc_structure */
-IQA_INLINE static double _calc_structure(float sigma_12, double sigma_comb_12, float sigma1, float sigma2, float C3, float gamma)
-{
-    double result;
-    float sign;
-    /* For MS-SSIM* */
-    if (C3 == 0 && sigma_comb_12 == 0) {
-        if (sigma1 == 0 && sigma2 == 0)
-            return 1.0;
-        else if (sigma1 == 0 || sigma2 == 0)
-            return 0.0;
-    }
-    result = (sigma_12 + C3) / (sigma_comb_12 + C3);
-    if (gamma == 1.0f)
-        return result;
-    sign = result < 0.0 ? -1.0f : 1.0f;
-    return sign * pow(fabs(result),(double)gamma);
-}

+ 0 - 20
3rdparty/libsquish/LICENSE

@@ -1,20 +0,0 @@
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ 0 - 35
3rdparty/libsquish/README

@@ -1,35 +0,0 @@
-LICENSE
--------
-
-The squish library is distributed under the terms and conditions of the MIT
-license. This license is specified at the top of each source file and must be
-preserved in its entirety.
-
-BUILDING AND INSTALLING THE LIBRARY
------------------------------------
-
-If you are using Visual Studio 2003 or above under Windows then load the Visual
-Studio 2003 project in the vs7 folder. By default, the library is built using
-SSE2 optimisations. To change this either change or remove the SQUISH_USE_SSE=2
-from the preprocessor symbols.
-
-If you are using a Mac then load the Xcode 2.2 project in the distribution. By
-default, the library is built using Altivec optimisations. To change this
-either change or remove SQUISH_USE_ALTIVEC=1 from the preprocessor symbols. I
-guess I'll have to think about changing this for the new Intel Macs that are
-rolling out...
-
-If you are using unix then first edit the config file in the base directory of
-the distribution, enabling Altivec or SSE with the USE_ALTIVEC or USE_SSE
-variables, and editing the optimisation flags passed to the C++ compiler if
-necessary. Then make can be used to build the library, and make install (from
-the superuser account) can be used to install (into /usr/local by default).
-
-REPORTING BUGS OR FEATURE REQUESTS
-----------------------------------
-
-Feedback can be sent to Simon Brown (the developer) at [email protected]
-
-New releases are announced on the squish library homepage at
-http://sjbrown.co.uk/?code=squish
-

+ 0 - 350
3rdparty/libsquish/alpha.cpp

@@ -1,350 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "alpha.h"
-
-#include <climits>
-#include <algorithm>
-
-namespace squish {
-
-static int FloatToInt( float a, int limit )
-{
-	// use ANSI round-to-zero behaviour to get round-to-nearest
-	int i = ( int )( a + 0.5f );
-
-	// clamp to the limit
-	if( i < 0 )
-		i = 0;
-	else if( i > limit )
-		i = limit; 
-
-	// done
-	return i;
-}
-
-void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
-{
-	u8* bytes = reinterpret_cast< u8* >( block );
-	
-	// quantise and pack the alpha values pairwise
-	for( int i = 0; i < 8; ++i )
-	{
-		// quantise down to 4 bits
-		float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
-		float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
-		int quant1 = FloatToInt( alpha1, 15 );
-		int quant2 = FloatToInt( alpha2, 15 );
-		
-		// set alpha to zero where masked
-		int bit1 = 1 << ( 2*i );
-		int bit2 = 1 << ( 2*i + 1 );
-		if( ( mask & bit1 ) == 0 )
-			quant1 = 0;
-		if( ( mask & bit2 ) == 0 )
-			quant2 = 0;
-
-		// pack into the byte
-		bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
-	}
-}
-
-void DecompressAlphaDxt3( u8* rgba, void const* block )
-{
-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
-	
-	// unpack the alpha values pairwise
-	for( int i = 0; i < 8; ++i )
-	{
-		// quantise down to 4 bits
-		u8 quant = bytes[i];
-		
-		// unpack the values
-		u8 lo = quant & 0x0f;
-		u8 hi = quant & 0xf0;
-
-		// convert back up to bytes
-		rgba[8*i + 3] = lo | ( lo << 4 );
-		rgba[8*i + 7] = hi | ( hi >> 4 );
-	}
-}
-
-static void FixRange( int& min, int& max, int steps )
-{
-	if( max - min < steps )
-		max = std::min( min + steps, 255 );
-	if( max - min < steps )
-		min = std::max( 0, max - steps );
-}
-
-static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
-{
-	// fit each alpha value to the codebook
-	int err = 0;
-	for( int i = 0; i < 16; ++i )
-	{
-		// check this pixel is valid
-		int bit = 1 << i;
-		if( ( mask & bit ) == 0 )
-		{
-			// use the first code
-			indices[i] = 0;
-			continue;
-		}
-		
-		// find the least error and corresponding index
-		int value = rgba[4*i + 3];
-		int least = INT_MAX;
-		int index = 0;
-		for( int j = 0; j < 8; ++j )
-		{
-			// get the squared error from this code
-			int dist = ( int )value - ( int )codes[j];
-			dist *= dist;
-			
-			// compare with the best so far
-			if( dist < least )
-			{
-				least = dist;
-				index = j;
-			}
-		}
-		
-		// save this index and accumulate the error
-		indices[i] = ( u8 )index;
-		err += least;
-	}
-	
-	// return the total error
-	return err;
-}
-
-static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
-{
-	u8* bytes = reinterpret_cast< u8* >( block );
-	
-	// write the first two bytes
-	bytes[0] = ( u8 )alpha0;
-	bytes[1] = ( u8 )alpha1;
-	
-	// pack the indices with 3 bits each
-	u8* dest = bytes + 2;
-	u8 const* src = indices;
-	for( int i = 0; i < 2; ++i )
-	{
-		// pack 8 3-bit values
-		int value = 0;
-		for( int j = 0; j < 8; ++j )
-		{
-			int index = *src++;
-			value |= ( index << 3*j );
-		}
-			
-		// store in 3 bytes
-		for( int j = 0; j < 3; ++j )
-		{
-			int byte = ( value >> 8*j ) & 0xff;
-			*dest++ = ( u8 )byte;
-		}
-	}
-}
-
-static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
-{
-	// check the relative values of the endpoints
-	if( alpha0 > alpha1 )
-	{
-		// swap the indices
-		u8 swapped[16];
-		for( int i = 0; i < 16; ++i )
-		{
-			u8 index = indices[i];
-			if( index == 0 )
-				swapped[i] = 1;
-			else if( index == 1 )
-				swapped[i] = 0;
-			else if( index <= 5 )
-				swapped[i] = 7 - index;
-			else 
-				swapped[i] = index;
-		}
-		
-		// write the block
-		WriteAlphaBlock( alpha1, alpha0, swapped, block );
-	}
-	else
-	{
-		// write the block
-		WriteAlphaBlock( alpha0, alpha1, indices, block );
-	}	
-}
-
-static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
-{
-	// check the relative values of the endpoints
-	if( alpha0 < alpha1 )
-	{
-		// swap the indices
-		u8 swapped[16];
-		for( int i = 0; i < 16; ++i )
-		{
-			u8 index = indices[i];
-			if( index == 0 )
-				swapped[i] = 1;
-			else if( index == 1 )
-				swapped[i] = 0;
-			else
-				swapped[i] = 9 - index;
-		}
-		
-		// write the block
-		WriteAlphaBlock( alpha1, alpha0, swapped, block );
-	}
-	else
-	{
-		// write the block
-		WriteAlphaBlock( alpha0, alpha1, indices, block );
-	}	
-}
-
-void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
-{
-	// get the range for 5-alpha and 7-alpha interpolation
-	int min5 = 255;
-	int max5 = 0;
-	int min7 = 255;
-	int max7 = 0;
-	for( int i = 0; i < 16; ++i )
-	{
-		// check this pixel is valid
-		int bit = 1 << i;
-		if( ( mask & bit ) == 0 )
-			continue;
-
-		// incorporate into the min/max
-		int value = rgba[4*i + 3];
-		if( value < min7 )
-			min7 = value;
-		if( value > max7 )
-			max7 = value;
-		if( value != 0 && value < min5 )
-			min5 = value;
-		if( value != 255 && value > max5 )
-			max5 = value;
-	}
-	
-	// handle the case that no valid range was found
-	if( min5 > max5 )
-		min5 = max5;
-	if( min7 > max7 )
-		min7 = max7;
-		
-	// fix the range to be the minimum in each case
-	FixRange( min5, max5, 5 );
-	FixRange( min7, max7, 7 );
-	
-	// set up the 5-alpha code book
-	u8 codes5[8];
-	codes5[0] = ( u8 )min5;
-	codes5[1] = ( u8 )max5;
-	for( int i = 1; i < 5; ++i )
-		codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
-	codes5[6] = 0;
-	codes5[7] = 255;
-	
-	// set up the 7-alpha code book
-	u8 codes7[8];
-	codes7[0] = ( u8 )min7;
-	codes7[1] = ( u8 )max7;
-	for( int i = 1; i < 7; ++i )
-		codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
-		
-	// fit the data to both code books
-	u8 indices5[16];
-	u8 indices7[16];
-	int err5 = FitCodes( rgba, mask, codes5, indices5 );
-	int err7 = FitCodes( rgba, mask, codes7, indices7 );
-	
-	// save the block with least error
-	if( err5 <= err7 )
-		WriteAlphaBlock5( min5, max5, indices5, block );
-	else
-		WriteAlphaBlock7( min7, max7, indices7, block );
-}
-
-void DecompressAlphaDxt5( u8* rgba, void const* block )
-{
-	// get the two alpha values
-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
-	int alpha0 = bytes[0];
-	int alpha1 = bytes[1];
-	
-	// compare the values to build the codebook
-	u8 codes[8];
-	codes[0] = ( u8 )alpha0;
-	codes[1] = ( u8 )alpha1;
-	if( alpha0 <= alpha1 )
-	{
-		// use 5-alpha codebook
-		for( int i = 1; i < 5; ++i )
-			codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
-		codes[6] = 0;
-		codes[7] = 255;
-	}
-	else
-	{
-		// use 7-alpha codebook
-		for( int i = 1; i < 7; ++i )
-			codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
-	}
-	
-	// decode the indices
-	u8 indices[16];
-	u8 const* src = bytes + 2;
-	u8* dest = indices;
-	for( int i = 0; i < 2; ++i )
-	{
-		// grab 3 bytes
-		int value = 0;
-		for( int j = 0; j < 3; ++j )
-		{
-			int byte = *src++;
-			value |= ( byte << 8*j );
-		}
-		
-		// unpack 8 3-bit values from it
-		for( int j = 0; j < 8; ++j )
-		{
-			int index = ( value >> 3*j ) & 0x7;
-			*dest++ = ( u8 )index;
-		}
-	}
-	
-	// write out the indexed codebook values
-	for( int i = 0; i < 16; ++i )
-		rgba[4*i + 3] = codes[indices[i]];
-}
-
-} // namespace squish

+ 0 - 41
3rdparty/libsquish/alpha.h

@@ -1,41 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_ALPHA_H
-#define SQUISH_ALPHA_H
-
-#include "squish.h"
-
-namespace squish {
-
-void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
-void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
-
-void DecompressAlphaDxt3( u8* rgba, void const* block );
-void DecompressAlphaDxt5( u8* rgba, void const* block );
-
-} // namespace squish
-
-#endif // ndef SQUISH_ALPHA_H

+ 0 - 392
3rdparty/libsquish/clusterfit.cpp

@@ -1,392 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-	Copyright (c) 2007 Ignacio Castano                   [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "clusterfit.h"
-#include "colourset.h"
-#include "colourblock.h"
-#include <cfloat>
-
-namespace squish {
-
-ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 
-  : ColourFit( colours, flags )
-{
-	// set the iteration count
-	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
-
-	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
-	if( metric )
-		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
-	else
-		m_metric = VEC4_CONST( 1.0f );	
-
-	// initialise the best error
-	m_besterror = VEC4_CONST( FLT_MAX );
-
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-
-	// get the covariance matrix
-	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
-	
-	// compute the principle component
-	m_principle = ComputePrincipleComponent( covariance );
-}
-
-bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
-{
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-
-	// build the list of dot products
-	float dps[16];
-	u8* order = ( u8* )m_order + 16*iteration;
-	for( int i = 0; i < count; ++i )
-	{
-		dps[i] = Dot( values[i], axis );
-		order[i] = ( u8 )i;
-	}
-		
-	// stable sort using them
-	for( int i = 0; i < count; ++i )
-	{
-		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
-		{
-			std::swap( dps[j], dps[j - 1] );
-			std::swap( order[j], order[j - 1] );
-		}
-	}
-	
-	// check this ordering is unique
-	for( int it = 0; it < iteration; ++it )
-	{
-		u8 const* prev = ( u8* )m_order + 16*it;
-		bool same = true;
-		for( int i = 0; i < count; ++i )
-		{
-			if( order[i] != prev[i] )
-			{
-				same = false;
-				break;
-			}
-		}
-		if( same )
-			return false;
-	}
-	
-	// copy the ordering and weight all the points
-	Vec3 const* unweighted = m_colours->GetPoints();
-	float const* weights = m_colours->GetWeights();
-	m_xsum_wsum = VEC4_CONST( 0.0f );
-	for( int i = 0; i < count; ++i )
-	{
-		int j = order[i];
-		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
-		Vec4 w( weights[j] );
-		Vec4 x = p*w;
-		m_points_weights[i] = x;
-		m_xsum_wsum += x;
-	}
-	return true;
-}
-
-void ClusterFit::Compress3( void* block )
-{
-	// declare variables
-	int const count = m_colours->GetCount();
-	Vec4 const two = VEC4_CONST( 2.0 );
-	Vec4 const one = VEC4_CONST( 1.0f );
-	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
-	Vec4 const zero = VEC4_CONST( 0.0f );
-	Vec4 const half = VEC4_CONST( 0.5f );
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
-
-	// prepare an ordering using the principle axis
-	ConstructOrdering( m_principle, 0 );
-	
-	// check all possible clusters and iterate on the total order
-	Vec4 beststart = VEC4_CONST( 0.0f );
-	Vec4 bestend = VEC4_CONST( 0.0f );
-	Vec4 besterror = m_besterror;
-	u8 bestindices[16];
-	int bestiteration = 0;
-	int besti = 0, bestj = 0;
-	
-	// loop over iterations (we avoid the case that all points in first or last cluster)
-	for( int iterationIndex = 0;; )
-	{
-		// first cluster [0,i) is at the start
-		Vec4 part0 = VEC4_CONST( 0.0f );
-		for( int i = 0; i < count; ++i )
-		{
-			// second cluster [i,j) is half along
-			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
-			int jmin = ( i == 0 ) ? 1 : i;
-			for( int j = jmin;; )
-			{
-				// last cluster [j,count) is at the end
-				Vec4 part2 = m_xsum_wsum - part1 - part0;
-				
-				// compute least squares terms directly
-				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
-				Vec4 alpha2_sum = alphax_sum.SplatW();
-
-				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
-				Vec4 beta2_sum = betax_sum.SplatW();
-
-				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
-
-				// compute the least-squares optimal points
-				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
-				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
-				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
-
-				// clamp to the grid
-				a = Min( one, Max( zero, a ) );
-				b = Min( one, Max( zero, b ) );
-				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
-				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
-				
-				// compute the error (we skip the constant xxsum)
-				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-				Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
-				// apply the metric to the error term
-				Vec4 e5 = e4*m_metric;
-				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-				
-				// keep the solution if it wins
-				if( CompareAnyLessThan( error, besterror ) )
-				{
-					beststart = a;
-					bestend = b;
-					besti = i;
-					bestj = j;
-					besterror = error;
-					bestiteration = iterationIndex;
-				}
-
-				// advance
-				if( j == count )
-					break;
-				part1 += m_points_weights[j];
-				++j;
-			}
-
-			// advance
-			part0 += m_points_weights[i];
-		}
-		
-		// stop if we didn't improve in this iteration
-		if( bestiteration != iterationIndex )
-			break;
-			
-		// advance if possible
-		++iterationIndex;
-		if( iterationIndex == m_iterationCount )
-			break;
-			
-		// stop if a new iteration is an ordering that has already been tried
-		Vec3 axis = ( bestend - beststart ).GetVec3();
-		if( !ConstructOrdering( axis, iterationIndex ) )
-			break;
-	}
-		
-	// save the block if necessary
-	if( CompareAnyLessThan( besterror, m_besterror ) )
-	{
-		// remap the indices
-		u8 const* order = ( u8* )m_order + 16*bestiteration;
-
-		u8 unordered[16];
-		for( int m = 0; m < besti; ++m )
-			unordered[order[m]] = 0;
-		for( int m = besti; m < bestj; ++m )
-			unordered[order[m]] = 2;
-		for( int m = bestj; m < count; ++m )
-			unordered[order[m]] = 1;
-
-		m_colours->RemapIndices( unordered, bestindices );
-		
-		// save the block
-		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
-
-		// save the error
-		m_besterror = besterror;
-	}
-}
-
-void ClusterFit::Compress4( void* block )
-{
-	// declare variables
-	int const count = m_colours->GetCount();
-	Vec4 const two = VEC4_CONST( 2.0f );
-	Vec4 const one = VEC4_CONST( 1.0f );
-	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
-	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
-	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
-	Vec4 const zero = VEC4_CONST( 0.0f );
-	Vec4 const half = VEC4_CONST( 0.5f );
-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
-
-	// prepare an ordering using the principle axis
-	ConstructOrdering( m_principle, 0 );
-	
-	// check all possible clusters and iterate on the total order
-	Vec4 beststart = VEC4_CONST( 0.0f );
-	Vec4 bestend = VEC4_CONST( 0.0f );
-	Vec4 besterror = m_besterror;
-	u8 bestindices[16];
-	int bestiteration = 0;
-	int besti = 0, bestj = 0, bestk = 0;
-	
-	// loop over iterations (we avoid the case that all points in first or last cluster)
-	for( int iterationIndex = 0;; )
-	{
-		// first cluster [0,i) is at the start
-		Vec4 part0 = VEC4_CONST( 0.0f );
-		for( int i = 0; i < count; ++i )
-		{
-			// second cluster [i,j) is one third along
-			Vec4 part1 = VEC4_CONST( 0.0f );
-			for( int j = i;; )
-			{
-				// third cluster [j,k) is two thirds along
-				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
-				int kmin = ( j == 0 ) ? 1 : j;
-				for( int k = kmin;; )
-				{
-					// last cluster [k,count) is at the end
-					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
-
-					// compute least squares terms directly
-					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
-					Vec4 const alpha2_sum = alphax_sum.SplatW();
-					
-					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
-					Vec4 const beta2_sum = betax_sum.SplatW();
-					
-					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
-
-					// compute the least-squares optimal points
-					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
-					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
-					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
-
-					// clamp to the grid
-					a = Min( one, Max( zero, a ) );
-					b = Min( one, Max( zero, b ) );
-					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
-					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
-					
-					// compute the error (we skip the constant xxsum)
-					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
-					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
-					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
-					Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
-					// apply the metric to the error term
-					Vec4 e5 = e4*m_metric;
-					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-
-					// keep the solution if it wins
-					if( CompareAnyLessThan( error, besterror ) )
-					{
-						beststart = a;
-						bestend = b;
-						besterror = error;
-						besti = i;
-						bestj = j;
-						bestk = k;
-						bestiteration = iterationIndex;
-					}
-
-					// advance
-					if( k == count )
-						break;
-					part2 += m_points_weights[k];
-					++k;
-				}
-
-				// advance
-				if( j == count )
-					break;
-				part1 += m_points_weights[j];
-				++j;
-			}
-
-			// advance
-			part0 += m_points_weights[i];
-		}
-		
-		// stop if we didn't improve in this iteration
-		if( bestiteration != iterationIndex )
-			break;
-			
-		// advance if possible
-		++iterationIndex;
-		if( iterationIndex == m_iterationCount )
-			break;
-			
-		// stop if a new iteration is an ordering that has already been tried
-		Vec3 axis = ( bestend - beststart ).GetVec3();
-		if( !ConstructOrdering( axis, iterationIndex ) )
-			break;
-	}
-
-	// save the block if necessary
-	if( CompareAnyLessThan( besterror, m_besterror ) )
-	{
-		// remap the indices
-		u8 const* order = ( u8* )m_order + 16*bestiteration;
-
-		u8 unordered[16];
-		for( int m = 0; m < besti; ++m )
-			unordered[order[m]] = 0;
-		for( int m = besti; m < bestj; ++m )
-			unordered[order[m]] = 2;
-		for( int m = bestj; m < bestk; ++m )
-			unordered[order[m]] = 3;
-		for( int m = bestk; m < count; ++m )
-			unordered[order[m]] = 1;
-
-		m_colours->RemapIndices( unordered, bestindices );
-		
-		// save the block
-		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
-
-		// save the error
-		m_besterror = besterror;
-	}
-}
-
-} // namespace squish

+ 0 - 61
3rdparty/libsquish/clusterfit.h

@@ -1,61 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-	Copyright (c) 2007 Ignacio Castano                   [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_CLUSTERFIT_H
-#define SQUISH_CLUSTERFIT_H
-
-#include "squish.h"
-#include "maths.h"
-#include "simd.h"
-#include "colourfit.h"
-
-namespace squish {
-
-class ClusterFit : public ColourFit
-{
-public:
-	ClusterFit( ColourSet const* colours, int flags, float* metric );
-	
-private:
-	bool ConstructOrdering( Vec3 const& axis, int iteration );
-
-	virtual void Compress3( void* block );
-	virtual void Compress4( void* block );
-
-	enum { kMaxIterations = 8 };
-
-	int m_iterationCount;
-	Vec3 m_principle;
-	u8 m_order[16*kMaxIterations];
-	Vec4 m_points_weights[16];
-	Vec4 m_xsum_wsum;
-	Vec4 m_metric;
-	Vec4 m_besterror;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_CLUSTERFIT_H

+ 0 - 214
3rdparty/libsquish/colourblock.cpp

@@ -1,214 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "colourblock.h"
-
-namespace squish {
-
-static int FloatToInt( float a, int limit )
-{
-	// use ANSI round-to-zero behaviour to get round-to-nearest
-	int i = ( int )( a + 0.5f );
-
-	// clamp to the limit
-	if( i < 0 )
-		i = 0;
-	else if( i > limit )
-		i = limit; 
-
-	// done
-	return i;
-}
-
-static int FloatTo565( Vec3::Arg colour )
-{
-	// get the components in the correct range
-	int r = FloatToInt( 31.0f*colour.X(), 31 );
-	int g = FloatToInt( 63.0f*colour.Y(), 63 );
-	int b = FloatToInt( 31.0f*colour.Z(), 31 );
-	
-	// pack into a single value
-	return ( r << 11 ) | ( g << 5 ) | b;
-}
-
-static void WriteColourBlock( int a, int b, u8* indices, void* block )
-{
-	// get the block as bytes
-	u8* bytes = ( u8* )block;
-
-	// write the endpoints
-	bytes[0] = ( u8 )( a & 0xff );
-	bytes[1] = ( u8 )( a >> 8 );
-	bytes[2] = ( u8 )( b & 0xff );
-	bytes[3] = ( u8 )( b >> 8 );
-	
-	// write the indices
-	for( int i = 0; i < 4; ++i )
-	{
-		u8 const* ind = indices + 4*i;
-		bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
-	}
-}
-
-void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
-{
-	// get the packed values
-	int a = FloatTo565( start );
-	int b = FloatTo565( end );
-
-	// remap the indices
-	u8 remapped[16];
-	if( a <= b )
-	{
-		// use the indices directly
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = indices[i];
-	}
-	else
-	{
-		// swap a and b
-		std::swap( a, b );
-		for( int i = 0; i < 16; ++i )
-		{
-			if( indices[i] == 0 )
-				remapped[i] = 1;
-			else if( indices[i] == 1 )
-				remapped[i] = 0;
-			else
-				remapped[i] = indices[i];
-		}
-	}
-	
-	// write the block
-	WriteColourBlock( a, b, remapped, block );
-}
-
-void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
-{
-	// get the packed values
-	int a = FloatTo565( start );
-	int b = FloatTo565( end );
-
-	// remap the indices
-	u8 remapped[16];
-	if( a < b )
-	{
-		// swap a and b
-		std::swap( a, b );
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
-	}
-	else if( a == b )
-	{
-		// use index 0
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = 0;
-	}
-	else
-	{
-		// use the indices directly
-		for( int i = 0; i < 16; ++i )
-			remapped[i] = indices[i];
-	}
-	
-	// write the block
-	WriteColourBlock( a, b, remapped, block );
-}
-
-static int Unpack565( u8 const* packed, u8* colour )
-{
-	// build the packed value
-	int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
-	
-	// get the components in the stored range
-	u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
-	u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
-	u8 blue = ( u8 )( value & 0x1f );
-
-	// scale up to 8 bits
-	colour[0] = ( red << 3 ) | ( red >> 2 );
-	colour[1] = ( green << 2 ) | ( green >> 4 );
-	colour[2] = ( blue << 3 ) | ( blue >> 2 );
-	colour[3] = 255;
-	
-	// return the value
-	return value;
-}
-
-void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
-{
-	// get the block bytes
-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
-	
-	// unpack the endpoints
-	u8 codes[16];
-	int a = Unpack565( bytes, codes );
-	int b = Unpack565( bytes + 2, codes + 4 );
-	
-	// generate the midpoints
-	for( int i = 0; i < 3; ++i )
-	{
-		int c = codes[i];
-		int d = codes[4 + i];
-
-		if( isDxt1 && a <= b )
-		{
-			codes[8 + i] = ( u8 )( ( c + d )/2 );
-			codes[12 + i] = 0;
-		}
-		else
-		{
-			codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
-			codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
-		}
-	}
-	
-	// fill in alpha for the intermediate values
-	codes[8 + 3] = 255;
-	codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
-	
-	// unpack the indices
-	u8 indices[16];
-	for( int i = 0; i < 4; ++i )
-	{
-		u8* ind = indices + 4*i;
-		u8 packed = bytes[4 + i];
-		
-		ind[0] = packed & 0x3;
-		ind[1] = ( packed >> 2 ) & 0x3;
-		ind[2] = ( packed >> 4 ) & 0x3;
-		ind[3] = ( packed >> 6 ) & 0x3;
-	}
-
-	// store out the colours
-	for( int i = 0; i < 16; ++i )
-	{
-		u8 offset = 4*indices[i];
-		for( int j = 0; j < 4; ++j )
-			rgba[4*i + j] = codes[offset + j];
-	}
-}
-
-} // namespace squish

+ 0 - 41
3rdparty/libsquish/colourblock.h

@@ -1,41 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_COLOURBLOCK_H
-#define SQUISH_COLOURBLOCK_H
-
-#include "squish.h"
-#include "maths.h"
-
-namespace squish {
-
-void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
-void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
-
-void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
-
-} // namespace squish
-
-#endif // ndef SQUISH_COLOURBLOCK_H

+ 0 - 54
3rdparty/libsquish/colourfit.cpp

@@ -1,54 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "colourfit.h"
-#include "colourset.h"
-
-namespace squish {
-
-ColourFit::ColourFit( ColourSet const* colours, int flags ) 
-  : m_colours( colours ), 
-	m_flags( flags )
-{
-}
-
-ColourFit::~ColourFit()
-{
-}
-
-void ColourFit::Compress( void* block )
-{
-	bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
-	if( isDxt1 )
-	{
-		Compress3( block );
-		if( !m_colours->IsTransparent() )
-			Compress4( block );
-	}
-	else
-		Compress4( block );
-}
-
-} // namespace squish

+ 0 - 56
3rdparty/libsquish/colourfit.h

@@ -1,56 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_COLOURFIT_H
-#define SQUISH_COLOURFIT_H
-
-#include "squish.h"
-#include "maths.h"
-
-#include <climits>
-
-namespace squish {
-
-class ColourSet;
-
-class ColourFit
-{
-public:
-	ColourFit( ColourSet const* colours, int flags );
-	virtual ~ColourFit();
-
-	void Compress( void* block );
-
-protected:
-	virtual void Compress3( void* block ) = 0;
-	virtual void Compress4( void* block ) = 0;
-
-	ColourSet const* m_colours;
-	int m_flags;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_COLOURFIT_H

+ 0 - 121
3rdparty/libsquish/colourset.cpp

@@ -1,121 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "colourset.h"
-
-namespace squish {
-
-ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
-  : m_count( 0 ), 
-	m_transparent( false )
-{
-	// check the compression mode for dxt1
-	bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
-	bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
-
-	// create the minimal set
-	for( int i = 0; i < 16; ++i )
-	{
-		// check this pixel is enabled
-		int bit = 1 << i;
-		if( ( mask & bit ) == 0 )
-		{
-			m_remap[i] = -1;
-			continue;
-		}
-	
-		// check for transparent pixels when using dxt1
-		if( isDxt1 && rgba[4*i + 3] < 128 )
-		{
-			m_remap[i] = -1;
-			m_transparent = true;
-			continue;
-		}
-
-		// loop over previous points for a match
-		for( int j = 0;; ++j )
-		{
-			// allocate a new point
-			if( j == i )
-			{
-				// normalise coordinates to [0,1]
-				float x = ( float )rgba[4*i] / 255.0f;
-				float y = ( float )rgba[4*i + 1] / 255.0f;
-				float z = ( float )rgba[4*i + 2] / 255.0f;
-				
-				// ensure there is always non-zero weight even for zero alpha
-				float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
-
-				// add the point
-				m_points[m_count] = Vec3( x, y, z );
-				m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
-				m_remap[i] = m_count;
-				
-				// advance
-				++m_count;
-				break;
-			}
-		
-			// check for a match
-			int oldbit = 1 << j;
-			bool match = ( ( mask & oldbit ) != 0 )
-				&& ( rgba[4*i] == rgba[4*j] )
-				&& ( rgba[4*i + 1] == rgba[4*j + 1] )
-				&& ( rgba[4*i + 2] == rgba[4*j + 2] )
-				&& ( rgba[4*j + 3] >= 128 || !isDxt1 );
-			if( match )
-			{
-				// get the index of the match
-				int index = m_remap[j];
-				
-				// ensure there is always non-zero weight even for zero alpha
-				float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
-
-				// map to this point and increase the weight
-				m_weights[index] += ( weightByAlpha ? w : 1.0f );
-				m_remap[i] = index;
-				break;
-			}
-		}
-	}
-
-	// square root the weights
-	for( int i = 0; i < m_count; ++i )
-		m_weights[i] = std::sqrt( m_weights[i] );
-}
-
-void ColourSet::RemapIndices( u8 const* source, u8* target ) const
-{
-	for( int i = 0; i < 16; ++i )
-	{
-		int j = m_remap[i];
-		if( j == -1 )
-			target[i] = 3;
-		else
-			target[i] = source[j];
-	}
-}
-
-} // namespace squish

+ 0 - 58
3rdparty/libsquish/colourset.h

@@ -1,58 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_COLOURSET_H
-#define SQUISH_COLOURSET_H
-
-#include "squish.h"
-#include "maths.h"
-
-namespace squish {
-
-/*! @brief Represents a set of block colours
-*/
-class ColourSet
-{
-public:
-	ColourSet( u8 const* rgba, int mask, int flags );
-
-	int GetCount() const { return m_count; }
-	Vec3 const* GetPoints() const { return m_points; }
-	float const* GetWeights() const { return m_weights; }
-	bool IsTransparent() const { return m_transparent; }
-
-	void RemapIndices( u8 const* source, u8* target ) const;
-
-private:
-	int m_count;
-	Vec3 m_points[16];
-	float m_weights[16];
-	int m_remap[16];
-	bool m_transparent;
-};
-
-} // namespace sqish
-
-#endif // ndef SQUISH_COLOURSET_H

+ 0 - 49
3rdparty/libsquish/config.h

@@ -1,49 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_CONFIG_H
-#define SQUISH_CONFIG_H
-
-// Set to 1 when building squish to use Altivec instructions.
-#ifndef SQUISH_USE_ALTIVEC
-#define SQUISH_USE_ALTIVEC 0
-#endif
-
-// Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
-#ifndef SQUISH_USE_SSE
-#define SQUISH_USE_SSE 0
-#endif
-
-// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
-#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
-#error "Cannot enable both Altivec and SSE!"
-#endif
-#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
-#define SQUISH_USE_SIMD 1
-#else
-#define SQUISH_USE_SIMD 0
-#endif
-
-#endif // ndef SQUISH_CONFIG_H

+ 0 - 259
3rdparty/libsquish/maths.cpp

@@ -1,259 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-/*! @file
-
-	The symmetric eigensystem solver algorithm is from 
-	http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
-*/
-
-#include "maths.h"
-#include "simd.h"
-#include <cfloat>
-
-namespace squish {
-
-Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
-{
-	// compute the centroid
-	float total = 0.0f;
-	Vec3 centroid( 0.0f );
-	for( int i = 0; i < n; ++i )
-	{
-		total += weights[i];
-		centroid += weights[i]*points[i];
-	}
-	if( total > FLT_EPSILON )
-		centroid /= total;
-
-	// accumulate the covariance matrix
-	Sym3x3 covariance( 0.0f );
-	for( int i = 0; i < n; ++i )
-	{
-		Vec3 a = points[i] - centroid;
-		Vec3 b = weights[i]*a;
-		
-		covariance[0] += a.X()*b.X();
-		covariance[1] += a.X()*b.Y();
-		covariance[2] += a.X()*b.Z();
-		covariance[3] += a.Y()*b.Y();
-		covariance[4] += a.Y()*b.Z();
-		covariance[5] += a.Z()*b.Z();
-	}
-	
-	// return it
-	return covariance;
-}
-
-#if 0
-
-static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
-{
-	// compute M
-	Sym3x3 m;
-	m[0] = matrix[0] - evalue;
-	m[1] = matrix[1];
-	m[2] = matrix[2];
-	m[3] = matrix[3] - evalue;
-	m[4] = matrix[4];
-	m[5] = matrix[5] - evalue;
-
-	// compute U
-	Sym3x3 u;
-	u[0] = m[3]*m[5] - m[4]*m[4];
-	u[1] = m[2]*m[4] - m[1]*m[5];
-	u[2] = m[1]*m[4] - m[2]*m[3];
-	u[3] = m[0]*m[5] - m[2]*m[2];
-	u[4] = m[1]*m[2] - m[4]*m[0];
-	u[5] = m[0]*m[3] - m[1]*m[1];
-
-	// find the largest component
-	float mc = std::fabs( u[0] );
-	int mi = 0;
-	for( int i = 1; i < 6; ++i )
-	{
-		float c = std::fabs( u[i] );
-		if( c > mc )
-		{
-			mc = c;
-			mi = i;
-		}
-	}
-
-	// pick the column with this component
-	switch( mi )
-	{
-	case 0:
-		return Vec3( u[0], u[1], u[2] );
-
-	case 1:
-	case 3:
-		return Vec3( u[1], u[3], u[4] );
-
-	default:
-		return Vec3( u[2], u[4], u[5] );
-	}
-}
-
-static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
-{
-	// compute M
-	Sym3x3 m;
-	m[0] = matrix[0] - evalue;
-	m[1] = matrix[1];
-	m[2] = matrix[2];
-	m[3] = matrix[3] - evalue;
-	m[4] = matrix[4];
-	m[5] = matrix[5] - evalue;
-
-	// find the largest component
-	float mc = std::fabs( m[0] );
-	int mi = 0;
-	for( int i = 1; i < 6; ++i )
-	{
-		float c = std::fabs( m[i] );
-		if( c > mc )
-		{
-			mc = c;
-			mi = i;
-		}
-	}
-
-	// pick the first eigenvector based on this index
-	switch( mi )
-	{
-	case 0:
-	case 1:
-		return Vec3( -m[1], m[0], 0.0f );
-
-	case 2:
-		return Vec3( m[2], 0.0f, -m[0] );
-
-	case 3:
-	case 4:
-		return Vec3( 0.0f, -m[4], m[3] );
-
-	default:
-		return Vec3( 0.0f, -m[5], m[4] );
-	}
-}
-
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
-{
-	// compute the cubic coefficients
-	float c0 = matrix[0]*matrix[3]*matrix[5] 
-		+ 2.0f*matrix[1]*matrix[2]*matrix[4] 
-		- matrix[0]*matrix[4]*matrix[4] 
-		- matrix[3]*matrix[2]*matrix[2] 
-		- matrix[5]*matrix[1]*matrix[1];
-	float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
-		- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
-	float c2 = matrix[0] + matrix[3] + matrix[5];
-
-	// compute the quadratic coefficients
-	float a = c1 - ( 1.0f/3.0f )*c2*c2;
-	float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
-
-	// compute the root count check
-	float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
-
-	// test the multiplicity
-	if( FLT_EPSILON < Q )
-	{
-		// only one root, which implies we have a multiple of the identity
-        return Vec3( 1.0f );
-	}
-	else if( Q < -FLT_EPSILON )
-	{
-		// three distinct roots
-		float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
-		float rho = std::sqrt( 0.25f*b*b - Q );
-
-		float rt = std::pow( rho, 1.0f/3.0f );
-		float ct = std::cos( theta/3.0f );
-		float st = std::sin( theta/3.0f );
-
-		float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
-		float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
-		float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
-
-		// pick the larger
-		if( std::fabs( l2 ) > std::fabs( l1 ) )
-			l1 = l2;
-		if( std::fabs( l3 ) > std::fabs( l1 ) )
-			l1 = l3;
-
-		// get the eigenvector
-		return GetMultiplicity1Evector( matrix, l1 );
-	}
-	else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
-	{
-		// two roots
-		float rt;
-		if( b < 0.0f )
-			rt = -std::pow( -0.5f*b, 1.0f/3.0f );
-		else
-			rt = std::pow( 0.5f*b, 1.0f/3.0f );
-		
-		float l1 = ( 1.0f/3.0f )*c2 + rt;		// repeated
-		float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
-		
-		// get the eigenvector
-		if( std::fabs( l1 ) > std::fabs( l2 ) )
-			return GetMultiplicity2Evector( matrix, l1 );
-		else
-			return GetMultiplicity1Evector( matrix, l2 );
-	}
-}
-
-#else
-
-#define POWER_ITERATION_COUNT 	8
-
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
-{
-	Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
-	Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
-	Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
-	Vec4 v = VEC4_CONST( 1.0f );
-	for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
-	{
-		// matrix multiply
-		Vec4 w = row0*v.SplatX();
-		w = MultiplyAdd(row1, v.SplatY(), w);
-		w = MultiplyAdd(row2, v.SplatZ(), w);
-
-		// get max component from xyz in all channels
-		Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
-
-		// divide through and advance
-		v = w*Reciprocal(a);
-	}
-	return v.GetVec3();
-}
-
-#endif
-
-} // namespace squish

+ 0 - 233
3rdparty/libsquish/maths.h

@@ -1,233 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_MATHS_H
-#define SQUISH_MATHS_H
-
-#include <cmath>
-#include <algorithm>
-#include "config.h"
-
-namespace squish {
-
-class Vec3
-{
-public:
-	typedef Vec3 const& Arg;
-
-	Vec3()
-	{
-	}
-
-	explicit Vec3( float s )
-	{
-		m_x = s;
-		m_y = s;
-		m_z = s;
-	}
-
-	Vec3( float x, float y, float z )
-	{
-		m_x = x;
-		m_y = y;
-		m_z = z;
-	}
-	
-	float X() const { return m_x; }
-	float Y() const { return m_y; }
-	float Z() const { return m_z; }
-	
-	Vec3 operator-() const
-	{
-		return Vec3( -m_x, -m_y, -m_z );
-	}
-	
-	Vec3& operator+=( Arg v )
-	{
-		m_x += v.m_x;
-		m_y += v.m_y;
-		m_z += v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator-=( Arg v )
-	{
-		m_x -= v.m_x;
-		m_y -= v.m_y;
-		m_z -= v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator*=( Arg v )
-	{
-		m_x *= v.m_x;
-		m_y *= v.m_y;
-		m_z *= v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator*=( float s )
-	{
-		m_x *= s;
-		m_y *= s;
-		m_z *= s;
-		return *this;
-	}
-	
-	Vec3& operator/=( Arg v )
-	{
-		m_x /= v.m_x;
-		m_y /= v.m_y;
-		m_z /= v.m_z;
-		return *this;
-	}
-	
-	Vec3& operator/=( float s )
-	{
-		float t = 1.0f/s;
-		m_x *= t;
-		m_y *= t;
-		m_z *= t;
-		return *this;
-	}
-	
-	friend Vec3 operator+( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy += right;
-	}
-	
-	friend Vec3 operator-( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy -= right;
-	}
-	
-	friend Vec3 operator*( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy *= right;
-	}
-	
-	friend Vec3 operator*( Arg left, float right )
-	{
-		Vec3 copy( left );
-		return copy *= right;
-	}
-	
-	friend Vec3 operator*( float left, Arg right )
-	{
-		Vec3 copy( right );
-		return copy *= left;
-	}
-	
-	friend Vec3 operator/( Arg left, Arg right )
-	{
-		Vec3 copy( left );
-		return copy /= right;
-	}
-	
-	friend Vec3 operator/( Arg left, float right )
-	{
-		Vec3 copy( left );
-		return copy /= right;
-	}
-	
-	friend float Dot( Arg left, Arg right )
-	{
-		return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
-	}
-	
-	friend Vec3 Min( Arg left, Arg right )
-	{
-		return Vec3(
-			std::min( left.m_x, right.m_x ), 
-			std::min( left.m_y, right.m_y ), 
-			std::min( left.m_z, right.m_z )
-		);
-	}
-
-	friend Vec3 Max( Arg left, Arg right )
-	{
-		return Vec3(
-			std::max( left.m_x, right.m_x ), 
-			std::max( left.m_y, right.m_y ), 
-			std::max( left.m_z, right.m_z )
-		);
-	}
-
-	friend Vec3 Truncate( Arg v )
-	{
-		return Vec3(
-			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
-			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
-			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
-		);
-	}
-
-private:
-	float m_x;
-	float m_y;
-	float m_z;
-};
-
-inline float LengthSquared( Vec3::Arg v )
-{
-	return Dot( v, v );
-}
-
-class Sym3x3
-{
-public:
-	Sym3x3()
-	{
-	}
-
-	Sym3x3( float s )
-	{
-		for( int i = 0; i < 6; ++i )
-			m_x[i] = s;
-	}
-
-	float operator[]( int index ) const
-	{
-		return m_x[index];
-	}
-
-	float& operator[]( int index )
-	{
-		return m_x[index];
-	}
-
-private:
-	float m_x[6];
-};
-
-Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
-
-} // namespace squish
-
-#endif // ndef SQUISH_MATHS_H

+ 0 - 201
3rdparty/libsquish/rangefit.cpp

@@ -1,201 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "rangefit.h"
-#include "colourset.h"
-#include "colourblock.h"
-#include <cfloat>
-
-namespace squish {
-
-RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) 
-  : ColourFit( colours, flags )
-{
-	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
-	if( metric )
-		m_metric = Vec3( metric[0], metric[1], metric[2] );
-	else
-		m_metric = Vec3( 1.0f );	
-
-	// initialise the best error
-	m_besterror = FLT_MAX;
-
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-	float const* weights = m_colours->GetWeights();
-	
-	// get the covariance matrix
-	Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
-	
-	// compute the principle component
-	Vec3 principle = ComputePrincipleComponent( covariance );
-
-	// get the min and max range as the codebook endpoints
-	Vec3 start( 0.0f );
-	Vec3 end( 0.0f );
-	if( count > 0 )
-	{
-		float min, max;
-		
-		// compute the range
-		start = end = values[0];
-		min = max = Dot( values[0], principle );
-		for( int i = 1; i < count; ++i )
-		{
-			float val = Dot( values[i], principle );
-			if( val < min )
-			{
-				start = values[i];
-				min = val;
-			}
-			else if( val > max )
-			{
-				end = values[i];
-				max = val;
-			}
-		}
-	}
-			
-	// clamp the output to [0, 1]
-	Vec3 const one( 1.0f );
-	Vec3 const zero( 0.0f );
-	start = Min( one, Max( zero, start ) );
-	end = Min( one, Max( zero, end ) );
-
-	// clamp to the grid and save
-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
-	Vec3 const half( 0.5f );
-	m_start = Truncate( grid*start + half )*gridrcp;
-	m_end = Truncate( grid*end + half )*gridrcp;
-}
-
-void RangeFit::Compress3( void* block )
-{
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-	
-	// create a codebook
-	Vec3 codes[3];
-	codes[0] = m_start;
-	codes[1] = m_end;
-	codes[2] = 0.5f*m_start + 0.5f*m_end;
-
-	// match each point to the closest code
-	u8 closest[16];
-	float error = 0.0f;
-	for( int i = 0; i < count; ++i )
-	{
-		// find the closest code
-		float dist = FLT_MAX;
-		int idx = 0;
-		for( int j = 0; j < 3; ++j )
-		{
-			float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
-			if( d < dist )
-			{
-				dist = d;
-				idx = j;
-			}
-		}
-		
-		// save the index
-		closest[i] = ( u8 )idx;
-		
-		// accumulate the error
-		error += dist;
-	}
-	
-	// save this scheme if it wins
-	if( error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( closest, indices );
-		
-		// save the block
-		WriteColourBlock3( m_start, m_end, indices, block );
-		
-		// save the error
-		m_besterror = error;
-	}
-}
-
-void RangeFit::Compress4( void* block )
-{
-	// cache some values
-	int const count = m_colours->GetCount();
-	Vec3 const* values = m_colours->GetPoints();
-	
-	// create a codebook
-	Vec3 codes[4];
-	codes[0] = m_start;
-	codes[1] = m_end;
-	codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
-	codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
-
-	// match each point to the closest code
-	u8 closest[16];
-	float error = 0.0f;
-	for( int i = 0; i < count; ++i )
-	{
-		// find the closest code
-		float dist = FLT_MAX;
-		int idx = 0;
-		for( int j = 0; j < 4; ++j )
-		{
-			float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
-			if( d < dist )
-			{
-				dist = d;
-				idx = j;
-			}
-		}
-		
-		// save the index
-		closest[i] = ( u8 )idx;
-		
-		// accumulate the error
-		error += dist;
-	}
-	
-	// save this scheme if it wins
-	if( error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( closest, indices );
-		
-		// save the block
-		WriteColourBlock4( m_start, m_end, indices, block );
-
-		// save the error
-		m_besterror = error;
-	}
-}
-
-} // namespace squish

+ 0 - 54
3rdparty/libsquish/rangefit.h

@@ -1,54 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_RANGEFIT_H
-#define SQUISH_RANGEFIT_H
-
-#include "squish.h"
-#include "colourfit.h"
-#include "maths.h"
-
-namespace squish {
-
-class ColourSet;
-
-class RangeFit : public ColourFit
-{
-public:
-	RangeFit( ColourSet const* colours, int flags, float* metric );
-	
-private:
-	virtual void Compress3( void* block );
-	virtual void Compress4( void* block );
-	
-	Vec3 m_metric;
-	Vec3 m_start;
-	Vec3 m_end;
-	float m_besterror;
-};
-
-} // squish
-
-#endif // ndef SQUISH_RANGEFIT_H

+ 0 - 32
3rdparty/libsquish/simd.h

@@ -1,32 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_H
-#define SQUISH_SIMD_H
-
-#include "maths.h"
-#include "simd_float.h"
-
-#endif // ndef SQUISH_SIMD_H

+ 0 - 183
3rdparty/libsquish/simd_float.h

@@ -1,183 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_FLOAT_H
-#define SQUISH_SIMD_FLOAT_H
-
-#include <algorithm>
-
-namespace squish {
-
-#define VEC4_CONST( X ) Vec4( X )
-
-class Vec4
-{
-public:
-	typedef Vec4 const& Arg;
-
-	Vec4() {}
-		
-	explicit Vec4( float s )
-	  : m_x( s ),
-		m_y( s ),
-		m_z( s ),
-		m_w( s )
-	{
-	}
-	
-	Vec4( float x, float y, float z, float w )
-	  : m_x( x ),
-		m_y( y ),
-		m_z( z ),
-		m_w( w )
-	{
-	}
-	
-	Vec3 GetVec3() const
-	{
-		return Vec3( m_x, m_y, m_z );
-	}
-	
-	Vec4 SplatX() const { return Vec4( m_x ); }
-	Vec4 SplatY() const { return Vec4( m_y ); }
-	Vec4 SplatZ() const { return Vec4( m_z ); }
-	Vec4 SplatW() const { return Vec4( m_w ); }
-
-	Vec4& operator+=( Arg v )
-	{
-		m_x += v.m_x;
-		m_y += v.m_y;
-		m_z += v.m_z;
-		m_w += v.m_w;
-		return *this;
-	}
-	
-	Vec4& operator-=( Arg v )
-	{
-		m_x -= v.m_x;
-		m_y -= v.m_y;
-		m_z -= v.m_z;
-		m_w -= v.m_w;
-		return *this;
-	}
-	
-	Vec4& operator*=( Arg v )
-	{
-		m_x *= v.m_x;
-		m_y *= v.m_y;
-		m_z *= v.m_z;
-		m_w *= v.m_w;
-		return *this;
-	}
-	
-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
-	{
-		Vec4 copy( left );
-		return copy += right;
-	}
-	
-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
-	{
-		Vec4 copy( left );
-		return copy -= right;
-	}
-	
-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
-	{
-		Vec4 copy( left );
-		return copy *= right;
-	}
-	
-	//! Returns a*b + c
-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return a*b + c;
-	}
-	
-	//! Returns -( a*b - c )
-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return c - a*b;
-	}
-	
-	friend Vec4 Reciprocal( Vec4::Arg v )
-	{
-		return Vec4( 
-			1.0f/v.m_x, 
-			1.0f/v.m_y, 
-			1.0f/v.m_z, 
-			1.0f/v.m_w 
-		);
-	}
-	
-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( 
-			std::min( left.m_x, right.m_x ), 
-			std::min( left.m_y, right.m_y ), 
-			std::min( left.m_z, right.m_z ), 
-			std::min( left.m_w, right.m_w ) 
-		);
-	}
-	
-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( 
-			std::max( left.m_x, right.m_x ), 
-			std::max( left.m_y, right.m_y ), 
-			std::max( left.m_z, right.m_z ), 
-			std::max( left.m_w, right.m_w ) 
-		);
-	}
-	
-	friend Vec4 Truncate( Vec4::Arg v )
-	{
-		return Vec4(
-			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
-			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
-			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
-			v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
-		);
-	}
-	
-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
-	{
-		return left.m_x < right.m_x
-			|| left.m_y < right.m_y
-			|| left.m_z < right.m_z
-			|| left.m_w < right.m_w;
-	}
-	
-private:
-	float m_x;
-	float m_y;
-	float m_z;
-	float m_w;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SIMD_FLOAT_H
-

+ 0 - 172
3rdparty/libsquish/singlecolourfit.cpp

@@ -1,172 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "singlecolourfit.h"
-#include "colourset.h"
-#include "colourblock.h"
-
-namespace squish {
-
-struct SourceBlock
-{
-	u8 start;
-	u8 end;
-	u8 error;
-};
-
-struct SingleColourLookup
-{
-	SourceBlock sources[2];
-};
-
-#include "singlecolourlookup.inl"
-
-static int FloatToInt( float a, int limit )
-{
-	// use ANSI round-to-zero behaviour to get round-to-nearest
-	int i = ( int )( a + 0.5f );
-
-	// clamp to the limit
-	if( i < 0 )
-		i = 0;
-	else if( i > limit )
-		i = limit; 
-
-	// done
-	return i;
-}
-
-SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
-  : ColourFit( colours, flags )
-{
-	// grab the single colour
-	Vec3 const* values = m_colours->GetPoints();
-	m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
-	m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
-	m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
-		
-	// initialise the best error
-	m_besterror = INT_MAX;
-}
-
-void SingleColourFit::Compress3( void* block )
-{
-	// build the table of lookups
-	SingleColourLookup const* const lookups[] = 
-	{
-		lookup_5_3, 
-		lookup_6_3, 
-		lookup_5_3
-	};
-	
-	// find the best end-points and index
-	ComputeEndPoints( lookups );
-	
-	// build the block if we win
-	if( m_error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( &m_index, indices );
-		
-		// save the block
-		WriteColourBlock3( m_start, m_end, indices, block );
-
-		// save the error
-		m_besterror = m_error;
-	}
-}
-
-void SingleColourFit::Compress4( void* block )
-{
-	// build the table of lookups
-	SingleColourLookup const* const lookups[] = 
-	{
-		lookup_5_4, 
-		lookup_6_4, 
-		lookup_5_4
-	};
-	
-	// find the best end-points and index
-	ComputeEndPoints( lookups );
-	
-	// build the block if we win
-	if( m_error < m_besterror )
-	{
-		// remap the indices
-		u8 indices[16];
-		m_colours->RemapIndices( &m_index, indices );
-		
-		// save the block
-		WriteColourBlock4( m_start, m_end, indices, block );
-
-		// save the error
-		m_besterror = m_error;
-	}
-}
-
-void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
-{
-	// check each index combination (endpoint or intermediate)
-	m_error = INT_MAX;
-	for( int index = 0; index < 2; ++index )
-	{
-		// check the error for this codebook index
-		SourceBlock const* sources[3];
-		int error = 0;
-		for( int channel = 0; channel < 3; ++channel )
-		{
-			// grab the lookup table and index for this channel
-			SingleColourLookup const* lookup = lookups[channel];
-			int target = m_colour[channel];
-			
-			// store a pointer to the source for this channel
-			sources[channel] = lookup[target].sources + index;
-			
-			// accumulate the error
-			int diff = sources[channel]->error;
-			error += diff*diff;			
-		}
-		
-		// keep it if the error is lower
-		if( error < m_error )
-		{
-			m_start = Vec3(
-				( float )sources[0]->start/31.0f, 
-				( float )sources[1]->start/63.0f, 
-				( float )sources[2]->start/31.0f
-			);
-			m_end = Vec3(
-				( float )sources[0]->end/31.0f, 
-				( float )sources[1]->end/63.0f, 
-				( float )sources[2]->end/31.0f
-			);
-			m_index = ( u8 )( 2*index );
-			m_error = error;
-		}
-	}
-}
-
-} // namespace squish

+ 0 - 58
3rdparty/libsquish/singlecolourfit.h

@@ -1,58 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SINGLECOLOURFIT_H
-#define SQUISH_SINGLECOLOURFIT_H
-
-#include "squish.h"
-#include "colourfit.h"
-
-namespace squish {
-
-class ColourSet;
-struct SingleColourLookup;
-
-class SingleColourFit : public ColourFit
-{
-public:
-	SingleColourFit( ColourSet const* colours, int flags );
-	
-private:
-	virtual void Compress3( void* block );
-	virtual void Compress4( void* block );
-	
-	void ComputeEndPoints( SingleColourLookup const* const* lookups );
-	
-	u8 m_colour[3];
-	Vec3 m_start;
-	Vec3 m_end;
-	u8 m_index;
-	int m_error;
-	int m_besterror;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SINGLECOLOURFIT_H

+ 0 - 1064
3rdparty/libsquish/singlecolourlookup.inl

@@ -1,1064 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-
-static SingleColourLookup const lookup_5_3[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 0, 1 } } },
-	{ { { 0, 0, 2 }, { 0, 0, 2 } } },
-	{ { { 0, 0, 3 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 4 }, { 0, 1, 0 } } },
-	{ { { 1, 0, 3 }, { 0, 1, 1 } } },
-	{ { { 1, 0, 2 }, { 0, 1, 2 } } },
-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 2 }, { 0, 2, 2 } } },
-	{ { { 1, 0, 3 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 4 }, { 0, 3, 0 } } },
-	{ { { 2, 0, 3 }, { 0, 3, 1 } } },
-	{ { { 2, 0, 2 }, { 0, 3, 2 } } },
-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 4, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
-	{ { { 2, 0, 2 }, { 0, 4, 2 } } },
-	{ { { 2, 0, 3 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 4 }, { 0, 5, 0 } } },
-	{ { { 3, 0, 3 }, { 0, 5, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 5, 2 } } },
-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 6, 2 } } },
-	{ { { 3, 0, 3 }, { 0, 7, 1 } } },
-	{ { { 3, 0, 4 }, { 0, 7, 0 } } },
-	{ { { 4, 0, 4 }, { 0, 7, 1 } } },
-	{ { { 4, 0, 3 }, { 0, 7, 2 } } },
-	{ { { 4, 0, 2 }, { 1, 7, 1 } } },
-	{ { { 4, 0, 1 }, { 1, 7, 0 } } },
-	{ { { 4, 0, 0 }, { 0, 8, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 8, 1 } } },
-	{ { { 4, 0, 2 }, { 2, 7, 1 } } },
-	{ { { 4, 0, 3 }, { 2, 7, 0 } } },
-	{ { { 4, 0, 4 }, { 0, 9, 0 } } },
-	{ { { 5, 0, 3 }, { 0, 9, 1 } } },
-	{ { { 5, 0, 2 }, { 3, 7, 1 } } },
-	{ { { 5, 0, 1 }, { 3, 7, 0 } } },
-	{ { { 5, 0, 0 }, { 0, 10, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 10, 1 } } },
-	{ { { 5, 0, 2 }, { 0, 10, 2 } } },
-	{ { { 5, 0, 3 }, { 0, 11, 1 } } },
-	{ { { 5, 0, 4 }, { 0, 11, 0 } } },
-	{ { { 6, 0, 3 }, { 0, 11, 1 } } },
-	{ { { 6, 0, 2 }, { 0, 11, 2 } } },
-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 6, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 6, 0, 2 }, { 0, 12, 2 } } },
-	{ { { 6, 0, 3 }, { 0, 13, 1 } } },
-	{ { { 6, 0, 4 }, { 0, 13, 0 } } },
-	{ { { 7, 0, 3 }, { 0, 13, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 13, 2 } } },
-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
-	{ { { 7, 0, 0 }, { 0, 14, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 14, 2 } } },
-	{ { { 7, 0, 3 }, { 0, 15, 1 } } },
-	{ { { 7, 0, 4 }, { 0, 15, 0 } } },
-	{ { { 8, 0, 4 }, { 0, 15, 1 } } },
-	{ { { 8, 0, 3 }, { 0, 15, 2 } } },
-	{ { { 8, 0, 2 }, { 1, 15, 1 } } },
-	{ { { 8, 0, 1 }, { 1, 15, 0 } } },
-	{ { { 8, 0, 0 }, { 0, 16, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 16, 1 } } },
-	{ { { 8, 0, 2 }, { 2, 15, 1 } } },
-	{ { { 8, 0, 3 }, { 2, 15, 0 } } },
-	{ { { 8, 0, 4 }, { 0, 17, 0 } } },
-	{ { { 9, 0, 3 }, { 0, 17, 1 } } },
-	{ { { 9, 0, 2 }, { 3, 15, 1 } } },
-	{ { { 9, 0, 1 }, { 3, 15, 0 } } },
-	{ { { 9, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 18, 1 } } },
-	{ { { 9, 0, 2 }, { 0, 18, 2 } } },
-	{ { { 9, 0, 3 }, { 0, 19, 1 } } },
-	{ { { 9, 0, 4 }, { 0, 19, 0 } } },
-	{ { { 10, 0, 3 }, { 0, 19, 1 } } },
-	{ { { 10, 0, 2 }, { 0, 19, 2 } } },
-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
-	{ { { 10, 0, 0 }, { 0, 20, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
-	{ { { 10, 0, 2 }, { 0, 20, 2 } } },
-	{ { { 10, 0, 3 }, { 0, 21, 1 } } },
-	{ { { 10, 0, 4 }, { 0, 21, 0 } } },
-	{ { { 11, 0, 3 }, { 0, 21, 1 } } },
-	{ { { 11, 0, 2 }, { 0, 21, 2 } } },
-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
-	{ { { 11, 0, 0 }, { 0, 22, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
-	{ { { 11, 0, 2 }, { 0, 22, 2 } } },
-	{ { { 11, 0, 3 }, { 0, 23, 1 } } },
-	{ { { 11, 0, 4 }, { 0, 23, 0 } } },
-	{ { { 12, 0, 4 }, { 0, 23, 1 } } },
-	{ { { 12, 0, 3 }, { 0, 23, 2 } } },
-	{ { { 12, 0, 2 }, { 1, 23, 1 } } },
-	{ { { 12, 0, 1 }, { 1, 23, 0 } } },
-	{ { { 12, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 24, 1 } } },
-	{ { { 12, 0, 2 }, { 2, 23, 1 } } },
-	{ { { 12, 0, 3 }, { 2, 23, 0 } } },
-	{ { { 12, 0, 4 }, { 0, 25, 0 } } },
-	{ { { 13, 0, 3 }, { 0, 25, 1 } } },
-	{ { { 13, 0, 2 }, { 3, 23, 1 } } },
-	{ { { 13, 0, 1 }, { 3, 23, 0 } } },
-	{ { { 13, 0, 0 }, { 0, 26, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 26, 1 } } },
-	{ { { 13, 0, 2 }, { 0, 26, 2 } } },
-	{ { { 13, 0, 3 }, { 0, 27, 1 } } },
-	{ { { 13, 0, 4 }, { 0, 27, 0 } } },
-	{ { { 14, 0, 3 }, { 0, 27, 1 } } },
-	{ { { 14, 0, 2 }, { 0, 27, 2 } } },
-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
-	{ { { 14, 0, 0 }, { 0, 28, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
-	{ { { 14, 0, 2 }, { 0, 28, 2 } } },
-	{ { { 14, 0, 3 }, { 0, 29, 1 } } },
-	{ { { 14, 0, 4 }, { 0, 29, 0 } } },
-	{ { { 15, 0, 3 }, { 0, 29, 1 } } },
-	{ { { 15, 0, 2 }, { 0, 29, 2 } } },
-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 15, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 15, 0, 2 }, { 0, 30, 2 } } },
-	{ { { 15, 0, 3 }, { 0, 31, 1 } } },
-	{ { { 15, 0, 4 }, { 0, 31, 0 } } },
-	{ { { 16, 0, 4 }, { 0, 31, 1 } } },
-	{ { { 16, 0, 3 }, { 0, 31, 2 } } },
-	{ { { 16, 0, 2 }, { 1, 31, 1 } } },
-	{ { { 16, 0, 1 }, { 1, 31, 0 } } },
-	{ { { 16, 0, 0 }, { 4, 28, 0 } } },
-	{ { { 16, 0, 1 }, { 4, 28, 1 } } },
-	{ { { 16, 0, 2 }, { 2, 31, 1 } } },
-	{ { { 16, 0, 3 }, { 2, 31, 0 } } },
-	{ { { 16, 0, 4 }, { 4, 29, 0 } } },
-	{ { { 17, 0, 3 }, { 4, 29, 1 } } },
-	{ { { 17, 0, 2 }, { 3, 31, 1 } } },
-	{ { { 17, 0, 1 }, { 3, 31, 0 } } },
-	{ { { 17, 0, 0 }, { 4, 30, 0 } } },
-	{ { { 17, 0, 1 }, { 4, 30, 1 } } },
-	{ { { 17, 0, 2 }, { 4, 30, 2 } } },
-	{ { { 17, 0, 3 }, { 4, 31, 1 } } },
-	{ { { 17, 0, 4 }, { 4, 31, 0 } } },
-	{ { { 18, 0, 3 }, { 4, 31, 1 } } },
-	{ { { 18, 0, 2 }, { 4, 31, 2 } } },
-	{ { { 18, 0, 1 }, { 5, 31, 1 } } },
-	{ { { 18, 0, 0 }, { 5, 31, 0 } } },
-	{ { { 18, 0, 1 }, { 5, 31, 1 } } },
-	{ { { 18, 0, 2 }, { 5, 31, 2 } } },
-	{ { { 18, 0, 3 }, { 6, 31, 1 } } },
-	{ { { 18, 0, 4 }, { 6, 31, 0 } } },
-	{ { { 19, 0, 3 }, { 6, 31, 1 } } },
-	{ { { 19, 0, 2 }, { 6, 31, 2 } } },
-	{ { { 19, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 19, 0, 0 }, { 7, 31, 0 } } },
-	{ { { 19, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 19, 0, 2 }, { 7, 31, 2 } } },
-	{ { { 19, 0, 3 }, { 8, 31, 1 } } },
-	{ { { 19, 0, 4 }, { 8, 31, 0 } } },
-	{ { { 20, 0, 4 }, { 8, 31, 1 } } },
-	{ { { 20, 0, 3 }, { 8, 31, 2 } } },
-	{ { { 20, 0, 2 }, { 9, 31, 1 } } },
-	{ { { 20, 0, 1 }, { 9, 31, 0 } } },
-	{ { { 20, 0, 0 }, { 12, 28, 0 } } },
-	{ { { 20, 0, 1 }, { 12, 28, 1 } } },
-	{ { { 20, 0, 2 }, { 10, 31, 1 } } },
-	{ { { 20, 0, 3 }, { 10, 31, 0 } } },
-	{ { { 20, 0, 4 }, { 12, 29, 0 } } },
-	{ { { 21, 0, 3 }, { 12, 29, 1 } } },
-	{ { { 21, 0, 2 }, { 11, 31, 1 } } },
-	{ { { 21, 0, 1 }, { 11, 31, 0 } } },
-	{ { { 21, 0, 0 }, { 12, 30, 0 } } },
-	{ { { 21, 0, 1 }, { 12, 30, 1 } } },
-	{ { { 21, 0, 2 }, { 12, 30, 2 } } },
-	{ { { 21, 0, 3 }, { 12, 31, 1 } } },
-	{ { { 21, 0, 4 }, { 12, 31, 0 } } },
-	{ { { 22, 0, 3 }, { 12, 31, 1 } } },
-	{ { { 22, 0, 2 }, { 12, 31, 2 } } },
-	{ { { 22, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 22, 0, 0 }, { 13, 31, 0 } } },
-	{ { { 22, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 22, 0, 2 }, { 13, 31, 2 } } },
-	{ { { 22, 0, 3 }, { 14, 31, 1 } } },
-	{ { { 22, 0, 4 }, { 14, 31, 0 } } },
-	{ { { 23, 0, 3 }, { 14, 31, 1 } } },
-	{ { { 23, 0, 2 }, { 14, 31, 2 } } },
-	{ { { 23, 0, 1 }, { 15, 31, 1 } } },
-	{ { { 23, 0, 0 }, { 15, 31, 0 } } },
-	{ { { 23, 0, 1 }, { 15, 31, 1 } } },
-	{ { { 23, 0, 2 }, { 15, 31, 2 } } },
-	{ { { 23, 0, 3 }, { 16, 31, 1 } } },
-	{ { { 23, 0, 4 }, { 16, 31, 0 } } },
-	{ { { 24, 0, 4 }, { 16, 31, 1 } } },
-	{ { { 24, 0, 3 }, { 16, 31, 2 } } },
-	{ { { 24, 0, 2 }, { 17, 31, 1 } } },
-	{ { { 24, 0, 1 }, { 17, 31, 0 } } },
-	{ { { 24, 0, 0 }, { 20, 28, 0 } } },
-	{ { { 24, 0, 1 }, { 20, 28, 1 } } },
-	{ { { 24, 0, 2 }, { 18, 31, 1 } } },
-	{ { { 24, 0, 3 }, { 18, 31, 0 } } },
-	{ { { 24, 0, 4 }, { 20, 29, 0 } } },
-	{ { { 25, 0, 3 }, { 20, 29, 1 } } },
-	{ { { 25, 0, 2 }, { 19, 31, 1 } } },
-	{ { { 25, 0, 1 }, { 19, 31, 0 } } },
-	{ { { 25, 0, 0 }, { 20, 30, 0 } } },
-	{ { { 25, 0, 1 }, { 20, 30, 1 } } },
-	{ { { 25, 0, 2 }, { 20, 30, 2 } } },
-	{ { { 25, 0, 3 }, { 20, 31, 1 } } },
-	{ { { 25, 0, 4 }, { 20, 31, 0 } } },
-	{ { { 26, 0, 3 }, { 20, 31, 1 } } },
-	{ { { 26, 0, 2 }, { 20, 31, 2 } } },
-	{ { { 26, 0, 1 }, { 21, 31, 1 } } },
-	{ { { 26, 0, 0 }, { 21, 31, 0 } } },
-	{ { { 26, 0, 1 }, { 21, 31, 1 } } },
-	{ { { 26, 0, 2 }, { 21, 31, 2 } } },
-	{ { { 26, 0, 3 }, { 22, 31, 1 } } },
-	{ { { 26, 0, 4 }, { 22, 31, 0 } } },
-	{ { { 27, 0, 3 }, { 22, 31, 1 } } },
-	{ { { 27, 0, 2 }, { 22, 31, 2 } } },
-	{ { { 27, 0, 1 }, { 23, 31, 1 } } },
-	{ { { 27, 0, 0 }, { 23, 31, 0 } } },
-	{ { { 27, 0, 1 }, { 23, 31, 1 } } },
-	{ { { 27, 0, 2 }, { 23, 31, 2 } } },
-	{ { { 27, 0, 3 }, { 24, 31, 1 } } },
-	{ { { 27, 0, 4 }, { 24, 31, 0 } } },
-	{ { { 28, 0, 4 }, { 24, 31, 1 } } },
-	{ { { 28, 0, 3 }, { 24, 31, 2 } } },
-	{ { { 28, 0, 2 }, { 25, 31, 1 } } },
-	{ { { 28, 0, 1 }, { 25, 31, 0 } } },
-	{ { { 28, 0, 0 }, { 28, 28, 0 } } },
-	{ { { 28, 0, 1 }, { 28, 28, 1 } } },
-	{ { { 28, 0, 2 }, { 26, 31, 1 } } },
-	{ { { 28, 0, 3 }, { 26, 31, 0 } } },
-	{ { { 28, 0, 4 }, { 28, 29, 0 } } },
-	{ { { 29, 0, 3 }, { 28, 29, 1 } } },
-	{ { { 29, 0, 2 }, { 27, 31, 1 } } },
-	{ { { 29, 0, 1 }, { 27, 31, 0 } } },
-	{ { { 29, 0, 0 }, { 28, 30, 0 } } },
-	{ { { 29, 0, 1 }, { 28, 30, 1 } } },
-	{ { { 29, 0, 2 }, { 28, 30, 2 } } },
-	{ { { 29, 0, 3 }, { 28, 31, 1 } } },
-	{ { { 29, 0, 4 }, { 28, 31, 0 } } },
-	{ { { 30, 0, 3 }, { 28, 31, 1 } } },
-	{ { { 30, 0, 2 }, { 28, 31, 2 } } },
-	{ { { 30, 0, 1 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 0 }, { 29, 31, 0 } } },
-	{ { { 30, 0, 1 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 2 }, { 29, 31, 2 } } },
-	{ { { 30, 0, 3 }, { 30, 31, 1 } } },
-	{ { { 30, 0, 4 }, { 30, 31, 0 } } },
-	{ { { 31, 0, 3 }, { 30, 31, 1 } } },
-	{ { { 31, 0, 2 }, { 30, 31, 2 } } },
-	{ { { 31, 0, 1 }, { 31, 31, 1 } } },
-	{ { { 31, 0, 0 }, { 31, 31, 0 } } }
-};
-
-static SingleColourLookup const lookup_6_3[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 2 }, { 0, 1, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 2 }, { 0, 3, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 4, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 2 }, { 0, 5, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 7, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 7, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 8, 1 } } },
-	{ { { 4, 0, 0 }, { 0, 8, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 4, 0, 2 }, { 0, 9, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 10, 1 } } },
-	{ { { 5, 0, 0 }, { 0, 10, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 11, 1 } } },
-	{ { { 5, 0, 2 }, { 0, 11, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 6, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 13, 1 } } },
-	{ { { 6, 0, 2 }, { 0, 13, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
-	{ { { 7, 0, 0 }, { 0, 14, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 15, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 15, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 16, 1 } } },
-	{ { { 8, 0, 0 }, { 0, 16, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 17, 1 } } },
-	{ { { 8, 0, 2 }, { 0, 17, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 18, 1 } } },
-	{ { { 9, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 19, 1 } } },
-	{ { { 9, 0, 2 }, { 0, 19, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
-	{ { { 10, 0, 0 }, { 0, 20, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 21, 1 } } },
-	{ { { 10, 0, 2 }, { 0, 21, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
-	{ { { 11, 0, 0 }, { 0, 22, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 23, 1 } } },
-	{ { { 11, 0, 2 }, { 0, 23, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 24, 1 } } },
-	{ { { 12, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 25, 1 } } },
-	{ { { 12, 0, 2 }, { 0, 25, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 26, 1 } } },
-	{ { { 13, 0, 0 }, { 0, 26, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 27, 1 } } },
-	{ { { 13, 0, 2 }, { 0, 27, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
-	{ { { 14, 0, 0 }, { 0, 28, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 29, 1 } } },
-	{ { { 14, 0, 2 }, { 0, 29, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 15, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 31, 1 } } },
-	{ { { 15, 0, 2 }, { 0, 31, 0 } } },
-	{ { { 16, 0, 2 }, { 1, 31, 1 } } },
-	{ { { 16, 0, 1 }, { 1, 31, 0 } } },
-	{ { { 16, 0, 0 }, { 0, 32, 0 } } },
-	{ { { 16, 0, 1 }, { 2, 31, 0 } } },
-	{ { { 16, 0, 2 }, { 0, 33, 0 } } },
-	{ { { 17, 0, 1 }, { 3, 31, 0 } } },
-	{ { { 17, 0, 0 }, { 0, 34, 0 } } },
-	{ { { 17, 0, 1 }, { 4, 31, 0 } } },
-	{ { { 17, 0, 2 }, { 0, 35, 0 } } },
-	{ { { 18, 0, 1 }, { 5, 31, 0 } } },
-	{ { { 18, 0, 0 }, { 0, 36, 0 } } },
-	{ { { 18, 0, 1 }, { 6, 31, 0 } } },
-	{ { { 18, 0, 2 }, { 0, 37, 0 } } },
-	{ { { 19, 0, 1 }, { 7, 31, 0 } } },
-	{ { { 19, 0, 0 }, { 0, 38, 0 } } },
-	{ { { 19, 0, 1 }, { 8, 31, 0 } } },
-	{ { { 19, 0, 2 }, { 0, 39, 0 } } },
-	{ { { 20, 0, 1 }, { 9, 31, 0 } } },
-	{ { { 20, 0, 0 }, { 0, 40, 0 } } },
-	{ { { 20, 0, 1 }, { 10, 31, 0 } } },
-	{ { { 20, 0, 2 }, { 0, 41, 0 } } },
-	{ { { 21, 0, 1 }, { 11, 31, 0 } } },
-	{ { { 21, 0, 0 }, { 0, 42, 0 } } },
-	{ { { 21, 0, 1 }, { 12, 31, 0 } } },
-	{ { { 21, 0, 2 }, { 0, 43, 0 } } },
-	{ { { 22, 0, 1 }, { 13, 31, 0 } } },
-	{ { { 22, 0, 0 }, { 0, 44, 0 } } },
-	{ { { 22, 0, 1 }, { 14, 31, 0 } } },
-	{ { { 22, 0, 2 }, { 0, 45, 0 } } },
-	{ { { 23, 0, 1 }, { 15, 31, 0 } } },
-	{ { { 23, 0, 0 }, { 0, 46, 0 } } },
-	{ { { 23, 0, 1 }, { 0, 47, 1 } } },
-	{ { { 23, 0, 2 }, { 0, 47, 0 } } },
-	{ { { 24, 0, 1 }, { 0, 48, 1 } } },
-	{ { { 24, 0, 0 }, { 0, 48, 0 } } },
-	{ { { 24, 0, 1 }, { 0, 49, 1 } } },
-	{ { { 24, 0, 2 }, { 0, 49, 0 } } },
-	{ { { 25, 0, 1 }, { 0, 50, 1 } } },
-	{ { { 25, 0, 0 }, { 0, 50, 0 } } },
-	{ { { 25, 0, 1 }, { 0, 51, 1 } } },
-	{ { { 25, 0, 2 }, { 0, 51, 0 } } },
-	{ { { 26, 0, 1 }, { 0, 52, 1 } } },
-	{ { { 26, 0, 0 }, { 0, 52, 0 } } },
-	{ { { 26, 0, 1 }, { 0, 53, 1 } } },
-	{ { { 26, 0, 2 }, { 0, 53, 0 } } },
-	{ { { 27, 0, 1 }, { 0, 54, 1 } } },
-	{ { { 27, 0, 0 }, { 0, 54, 0 } } },
-	{ { { 27, 0, 1 }, { 0, 55, 1 } } },
-	{ { { 27, 0, 2 }, { 0, 55, 0 } } },
-	{ { { 28, 0, 1 }, { 0, 56, 1 } } },
-	{ { { 28, 0, 0 }, { 0, 56, 0 } } },
-	{ { { 28, 0, 1 }, { 0, 57, 1 } } },
-	{ { { 28, 0, 2 }, { 0, 57, 0 } } },
-	{ { { 29, 0, 1 }, { 0, 58, 1 } } },
-	{ { { 29, 0, 0 }, { 0, 58, 0 } } },
-	{ { { 29, 0, 1 }, { 0, 59, 1 } } },
-	{ { { 29, 0, 2 }, { 0, 59, 0 } } },
-	{ { { 30, 0, 1 }, { 0, 60, 1 } } },
-	{ { { 30, 0, 0 }, { 0, 60, 0 } } },
-	{ { { 30, 0, 1 }, { 0, 61, 1 } } },
-	{ { { 30, 0, 2 }, { 0, 61, 0 } } },
-	{ { { 31, 0, 1 }, { 0, 62, 1 } } },
-	{ { { 31, 0, 0 }, { 0, 62, 0 } } },
-	{ { { 31, 0, 1 }, { 0, 63, 1 } } },
-	{ { { 31, 0, 2 }, { 0, 63, 0 } } },
-	{ { { 32, 0, 2 }, { 1, 63, 1 } } },
-	{ { { 32, 0, 1 }, { 1, 63, 0 } } },
-	{ { { 32, 0, 0 }, { 16, 48, 0 } } },
-	{ { { 32, 0, 1 }, { 2, 63, 0 } } },
-	{ { { 32, 0, 2 }, { 16, 49, 0 } } },
-	{ { { 33, 0, 1 }, { 3, 63, 0 } } },
-	{ { { 33, 0, 0 }, { 16, 50, 0 } } },
-	{ { { 33, 0, 1 }, { 4, 63, 0 } } },
-	{ { { 33, 0, 2 }, { 16, 51, 0 } } },
-	{ { { 34, 0, 1 }, { 5, 63, 0 } } },
-	{ { { 34, 0, 0 }, { 16, 52, 0 } } },
-	{ { { 34, 0, 1 }, { 6, 63, 0 } } },
-	{ { { 34, 0, 2 }, { 16, 53, 0 } } },
-	{ { { 35, 0, 1 }, { 7, 63, 0 } } },
-	{ { { 35, 0, 0 }, { 16, 54, 0 } } },
-	{ { { 35, 0, 1 }, { 8, 63, 0 } } },
-	{ { { 35, 0, 2 }, { 16, 55, 0 } } },
-	{ { { 36, 0, 1 }, { 9, 63, 0 } } },
-	{ { { 36, 0, 0 }, { 16, 56, 0 } } },
-	{ { { 36, 0, 1 }, { 10, 63, 0 } } },
-	{ { { 36, 0, 2 }, { 16, 57, 0 } } },
-	{ { { 37, 0, 1 }, { 11, 63, 0 } } },
-	{ { { 37, 0, 0 }, { 16, 58, 0 } } },
-	{ { { 37, 0, 1 }, { 12, 63, 0 } } },
-	{ { { 37, 0, 2 }, { 16, 59, 0 } } },
-	{ { { 38, 0, 1 }, { 13, 63, 0 } } },
-	{ { { 38, 0, 0 }, { 16, 60, 0 } } },
-	{ { { 38, 0, 1 }, { 14, 63, 0 } } },
-	{ { { 38, 0, 2 }, { 16, 61, 0 } } },
-	{ { { 39, 0, 1 }, { 15, 63, 0 } } },
-	{ { { 39, 0, 0 }, { 16, 62, 0 } } },
-	{ { { 39, 0, 1 }, { 16, 63, 1 } } },
-	{ { { 39, 0, 2 }, { 16, 63, 0 } } },
-	{ { { 40, 0, 1 }, { 17, 63, 1 } } },
-	{ { { 40, 0, 0 }, { 17, 63, 0 } } },
-	{ { { 40, 0, 1 }, { 18, 63, 1 } } },
-	{ { { 40, 0, 2 }, { 18, 63, 0 } } },
-	{ { { 41, 0, 1 }, { 19, 63, 1 } } },
-	{ { { 41, 0, 0 }, { 19, 63, 0 } } },
-	{ { { 41, 0, 1 }, { 20, 63, 1 } } },
-	{ { { 41, 0, 2 }, { 20, 63, 0 } } },
-	{ { { 42, 0, 1 }, { 21, 63, 1 } } },
-	{ { { 42, 0, 0 }, { 21, 63, 0 } } },
-	{ { { 42, 0, 1 }, { 22, 63, 1 } } },
-	{ { { 42, 0, 2 }, { 22, 63, 0 } } },
-	{ { { 43, 0, 1 }, { 23, 63, 1 } } },
-	{ { { 43, 0, 0 }, { 23, 63, 0 } } },
-	{ { { 43, 0, 1 }, { 24, 63, 1 } } },
-	{ { { 43, 0, 2 }, { 24, 63, 0 } } },
-	{ { { 44, 0, 1 }, { 25, 63, 1 } } },
-	{ { { 44, 0, 0 }, { 25, 63, 0 } } },
-	{ { { 44, 0, 1 }, { 26, 63, 1 } } },
-	{ { { 44, 0, 2 }, { 26, 63, 0 } } },
-	{ { { 45, 0, 1 }, { 27, 63, 1 } } },
-	{ { { 45, 0, 0 }, { 27, 63, 0 } } },
-	{ { { 45, 0, 1 }, { 28, 63, 1 } } },
-	{ { { 45, 0, 2 }, { 28, 63, 0 } } },
-	{ { { 46, 0, 1 }, { 29, 63, 1 } } },
-	{ { { 46, 0, 0 }, { 29, 63, 0 } } },
-	{ { { 46, 0, 1 }, { 30, 63, 1 } } },
-	{ { { 46, 0, 2 }, { 30, 63, 0 } } },
-	{ { { 47, 0, 1 }, { 31, 63, 1 } } },
-	{ { { 47, 0, 0 }, { 31, 63, 0 } } },
-	{ { { 47, 0, 1 }, { 32, 63, 1 } } },
-	{ { { 47, 0, 2 }, { 32, 63, 0 } } },
-	{ { { 48, 0, 2 }, { 33, 63, 1 } } },
-	{ { { 48, 0, 1 }, { 33, 63, 0 } } },
-	{ { { 48, 0, 0 }, { 48, 48, 0 } } },
-	{ { { 48, 0, 1 }, { 34, 63, 0 } } },
-	{ { { 48, 0, 2 }, { 48, 49, 0 } } },
-	{ { { 49, 0, 1 }, { 35, 63, 0 } } },
-	{ { { 49, 0, 0 }, { 48, 50, 0 } } },
-	{ { { 49, 0, 1 }, { 36, 63, 0 } } },
-	{ { { 49, 0, 2 }, { 48, 51, 0 } } },
-	{ { { 50, 0, 1 }, { 37, 63, 0 } } },
-	{ { { 50, 0, 0 }, { 48, 52, 0 } } },
-	{ { { 50, 0, 1 }, { 38, 63, 0 } } },
-	{ { { 50, 0, 2 }, { 48, 53, 0 } } },
-	{ { { 51, 0, 1 }, { 39, 63, 0 } } },
-	{ { { 51, 0, 0 }, { 48, 54, 0 } } },
-	{ { { 51, 0, 1 }, { 40, 63, 0 } } },
-	{ { { 51, 0, 2 }, { 48, 55, 0 } } },
-	{ { { 52, 0, 1 }, { 41, 63, 0 } } },
-	{ { { 52, 0, 0 }, { 48, 56, 0 } } },
-	{ { { 52, 0, 1 }, { 42, 63, 0 } } },
-	{ { { 52, 0, 2 }, { 48, 57, 0 } } },
-	{ { { 53, 0, 1 }, { 43, 63, 0 } } },
-	{ { { 53, 0, 0 }, { 48, 58, 0 } } },
-	{ { { 53, 0, 1 }, { 44, 63, 0 } } },
-	{ { { 53, 0, 2 }, { 48, 59, 0 } } },
-	{ { { 54, 0, 1 }, { 45, 63, 0 } } },
-	{ { { 54, 0, 0 }, { 48, 60, 0 } } },
-	{ { { 54, 0, 1 }, { 46, 63, 0 } } },
-	{ { { 54, 0, 2 }, { 48, 61, 0 } } },
-	{ { { 55, 0, 1 }, { 47, 63, 0 } } },
-	{ { { 55, 0, 0 }, { 48, 62, 0 } } },
-	{ { { 55, 0, 1 }, { 48, 63, 1 } } },
-	{ { { 55, 0, 2 }, { 48, 63, 0 } } },
-	{ { { 56, 0, 1 }, { 49, 63, 1 } } },
-	{ { { 56, 0, 0 }, { 49, 63, 0 } } },
-	{ { { 56, 0, 1 }, { 50, 63, 1 } } },
-	{ { { 56, 0, 2 }, { 50, 63, 0 } } },
-	{ { { 57, 0, 1 }, { 51, 63, 1 } } },
-	{ { { 57, 0, 0 }, { 51, 63, 0 } } },
-	{ { { 57, 0, 1 }, { 52, 63, 1 } } },
-	{ { { 57, 0, 2 }, { 52, 63, 0 } } },
-	{ { { 58, 0, 1 }, { 53, 63, 1 } } },
-	{ { { 58, 0, 0 }, { 53, 63, 0 } } },
-	{ { { 58, 0, 1 }, { 54, 63, 1 } } },
-	{ { { 58, 0, 2 }, { 54, 63, 0 } } },
-	{ { { 59, 0, 1 }, { 55, 63, 1 } } },
-	{ { { 59, 0, 0 }, { 55, 63, 0 } } },
-	{ { { 59, 0, 1 }, { 56, 63, 1 } } },
-	{ { { 59, 0, 2 }, { 56, 63, 0 } } },
-	{ { { 60, 0, 1 }, { 57, 63, 1 } } },
-	{ { { 60, 0, 0 }, { 57, 63, 0 } } },
-	{ { { 60, 0, 1 }, { 58, 63, 1 } } },
-	{ { { 60, 0, 2 }, { 58, 63, 0 } } },
-	{ { { 61, 0, 1 }, { 59, 63, 1 } } },
-	{ { { 61, 0, 0 }, { 59, 63, 0 } } },
-	{ { { 61, 0, 1 }, { 60, 63, 1 } } },
-	{ { { 61, 0, 2 }, { 60, 63, 0 } } },
-	{ { { 62, 0, 1 }, { 61, 63, 1 } } },
-	{ { { 62, 0, 0 }, { 61, 63, 0 } } },
-	{ { { 62, 0, 1 }, { 62, 63, 1 } } },
-	{ { { 62, 0, 2 }, { 62, 63, 0 } } },
-	{ { { 63, 0, 1 }, { 63, 63, 1 } } },
-	{ { { 63, 0, 0 }, { 63, 63, 0 } } }
-};
-
-static SingleColourLookup const lookup_5_4[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 2 }, { 0, 1, 0 } } },
-	{ { { 0, 0, 3 }, { 0, 1, 1 } } },
-	{ { { 0, 0, 4 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 3 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 2 }, { 0, 2, 1 } } },
-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 3, 0 } } },
-	{ { { 1, 0, 1 }, { 1, 2, 1 } } },
-	{ { { 1, 0, 2 }, { 1, 2, 0 } } },
-	{ { { 1, 0, 3 }, { 0, 4, 0 } } },
-	{ { { 1, 0, 4 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 3 }, { 0, 5, 0 } } },
-	{ { { 2, 0, 2 }, { 0, 5, 1 } } },
-	{ { { 2, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 2, 0, 1 }, { 2, 3, 1 } } },
-	{ { { 2, 0, 2 }, { 2, 3, 0 } } },
-	{ { { 2, 0, 3 }, { 0, 7, 0 } } },
-	{ { { 2, 0, 4 }, { 1, 6, 1 } } },
-	{ { { 3, 0, 3 }, { 1, 6, 0 } } },
-	{ { { 3, 0, 2 }, { 0, 8, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 9, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 3, 0, 2 }, { 0, 10, 1 } } },
-	{ { { 3, 0, 3 }, { 0, 10, 0 } } },
-	{ { { 3, 0, 4 }, { 2, 7, 1 } } },
-	{ { { 4, 0, 4 }, { 2, 7, 0 } } },
-	{ { { 4, 0, 3 }, { 0, 11, 0 } } },
-	{ { { 4, 0, 2 }, { 1, 10, 1 } } },
-	{ { { 4, 0, 1 }, { 1, 10, 0 } } },
-	{ { { 4, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 13, 1 } } },
-	{ { { 4, 0, 2 }, { 0, 13, 0 } } },
-	{ { { 4, 0, 3 }, { 0, 13, 1 } } },
-	{ { { 4, 0, 4 }, { 0, 14, 1 } } },
-	{ { { 5, 0, 3 }, { 0, 14, 0 } } },
-	{ { { 5, 0, 2 }, { 2, 11, 1 } } },
-	{ { { 5, 0, 1 }, { 2, 11, 0 } } },
-	{ { { 5, 0, 0 }, { 0, 15, 0 } } },
-	{ { { 5, 0, 1 }, { 1, 14, 1 } } },
-	{ { { 5, 0, 2 }, { 1, 14, 0 } } },
-	{ { { 5, 0, 3 }, { 0, 16, 0 } } },
-	{ { { 5, 0, 4 }, { 0, 17, 1 } } },
-	{ { { 6, 0, 3 }, { 0, 17, 0 } } },
-	{ { { 6, 0, 2 }, { 0, 17, 1 } } },
-	{ { { 6, 0, 1 }, { 0, 18, 1 } } },
-	{ { { 6, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 6, 0, 1 }, { 2, 15, 1 } } },
-	{ { { 6, 0, 2 }, { 2, 15, 0 } } },
-	{ { { 6, 0, 3 }, { 0, 19, 0 } } },
-	{ { { 6, 0, 4 }, { 1, 18, 1 } } },
-	{ { { 7, 0, 3 }, { 1, 18, 0 } } },
-	{ { { 7, 0, 2 }, { 0, 20, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 21, 1 } } },
-	{ { { 7, 0, 0 }, { 0, 21, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 21, 1 } } },
-	{ { { 7, 0, 2 }, { 0, 22, 1 } } },
-	{ { { 7, 0, 3 }, { 0, 22, 0 } } },
-	{ { { 7, 0, 4 }, { 2, 19, 1 } } },
-	{ { { 8, 0, 4 }, { 2, 19, 0 } } },
-	{ { { 8, 0, 3 }, { 0, 23, 0 } } },
-	{ { { 8, 0, 2 }, { 1, 22, 1 } } },
-	{ { { 8, 0, 1 }, { 1, 22, 0 } } },
-	{ { { 8, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 25, 1 } } },
-	{ { { 8, 0, 2 }, { 0, 25, 0 } } },
-	{ { { 8, 0, 3 }, { 0, 25, 1 } } },
-	{ { { 8, 0, 4 }, { 0, 26, 1 } } },
-	{ { { 9, 0, 3 }, { 0, 26, 0 } } },
-	{ { { 9, 0, 2 }, { 2, 23, 1 } } },
-	{ { { 9, 0, 1 }, { 2, 23, 0 } } },
-	{ { { 9, 0, 0 }, { 0, 27, 0 } } },
-	{ { { 9, 0, 1 }, { 1, 26, 1 } } },
-	{ { { 9, 0, 2 }, { 1, 26, 0 } } },
-	{ { { 9, 0, 3 }, { 0, 28, 0 } } },
-	{ { { 9, 0, 4 }, { 0, 29, 1 } } },
-	{ { { 10, 0, 3 }, { 0, 29, 0 } } },
-	{ { { 10, 0, 2 }, { 0, 29, 1 } } },
-	{ { { 10, 0, 1 }, { 0, 30, 1 } } },
-	{ { { 10, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 10, 0, 1 }, { 2, 27, 1 } } },
-	{ { { 10, 0, 2 }, { 2, 27, 0 } } },
-	{ { { 10, 0, 3 }, { 0, 31, 0 } } },
-	{ { { 10, 0, 4 }, { 1, 30, 1 } } },
-	{ { { 11, 0, 3 }, { 1, 30, 0 } } },
-	{ { { 11, 0, 2 }, { 4, 24, 0 } } },
-	{ { { 11, 0, 1 }, { 1, 31, 1 } } },
-	{ { { 11, 0, 0 }, { 1, 31, 0 } } },
-	{ { { 11, 0, 1 }, { 1, 31, 1 } } },
-	{ { { 11, 0, 2 }, { 2, 30, 1 } } },
-	{ { { 11, 0, 3 }, { 2, 30, 0 } } },
-	{ { { 11, 0, 4 }, { 2, 31, 1 } } },
-	{ { { 12, 0, 4 }, { 2, 31, 0 } } },
-	{ { { 12, 0, 3 }, { 4, 27, 0 } } },
-	{ { { 12, 0, 2 }, { 3, 30, 1 } } },
-	{ { { 12, 0, 1 }, { 3, 30, 0 } } },
-	{ { { 12, 0, 0 }, { 4, 28, 0 } } },
-	{ { { 12, 0, 1 }, { 3, 31, 1 } } },
-	{ { { 12, 0, 2 }, { 3, 31, 0 } } },
-	{ { { 12, 0, 3 }, { 3, 31, 1 } } },
-	{ { { 12, 0, 4 }, { 4, 30, 1 } } },
-	{ { { 13, 0, 3 }, { 4, 30, 0 } } },
-	{ { { 13, 0, 2 }, { 6, 27, 1 } } },
-	{ { { 13, 0, 1 }, { 6, 27, 0 } } },
-	{ { { 13, 0, 0 }, { 4, 31, 0 } } },
-	{ { { 13, 0, 1 }, { 5, 30, 1 } } },
-	{ { { 13, 0, 2 }, { 5, 30, 0 } } },
-	{ { { 13, 0, 3 }, { 8, 24, 0 } } },
-	{ { { 13, 0, 4 }, { 5, 31, 1 } } },
-	{ { { 14, 0, 3 }, { 5, 31, 0 } } },
-	{ { { 14, 0, 2 }, { 5, 31, 1 } } },
-	{ { { 14, 0, 1 }, { 6, 30, 1 } } },
-	{ { { 14, 0, 0 }, { 6, 30, 0 } } },
-	{ { { 14, 0, 1 }, { 6, 31, 1 } } },
-	{ { { 14, 0, 2 }, { 6, 31, 0 } } },
-	{ { { 14, 0, 3 }, { 8, 27, 0 } } },
-	{ { { 14, 0, 4 }, { 7, 30, 1 } } },
-	{ { { 15, 0, 3 }, { 7, 30, 0 } } },
-	{ { { 15, 0, 2 }, { 8, 28, 0 } } },
-	{ { { 15, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 15, 0, 0 }, { 7, 31, 0 } } },
-	{ { { 15, 0, 1 }, { 7, 31, 1 } } },
-	{ { { 15, 0, 2 }, { 8, 30, 1 } } },
-	{ { { 15, 0, 3 }, { 8, 30, 0 } } },
-	{ { { 15, 0, 4 }, { 10, 27, 1 } } },
-	{ { { 16, 0, 4 }, { 10, 27, 0 } } },
-	{ { { 16, 0, 3 }, { 8, 31, 0 } } },
-	{ { { 16, 0, 2 }, { 9, 30, 1 } } },
-	{ { { 16, 0, 1 }, { 9, 30, 0 } } },
-	{ { { 16, 0, 0 }, { 12, 24, 0 } } },
-	{ { { 16, 0, 1 }, { 9, 31, 1 } } },
-	{ { { 16, 0, 2 }, { 9, 31, 0 } } },
-	{ { { 16, 0, 3 }, { 9, 31, 1 } } },
-	{ { { 16, 0, 4 }, { 10, 30, 1 } } },
-	{ { { 17, 0, 3 }, { 10, 30, 0 } } },
-	{ { { 17, 0, 2 }, { 10, 31, 1 } } },
-	{ { { 17, 0, 1 }, { 10, 31, 0 } } },
-	{ { { 17, 0, 0 }, { 12, 27, 0 } } },
-	{ { { 17, 0, 1 }, { 11, 30, 1 } } },
-	{ { { 17, 0, 2 }, { 11, 30, 0 } } },
-	{ { { 17, 0, 3 }, { 12, 28, 0 } } },
-	{ { { 17, 0, 4 }, { 11, 31, 1 } } },
-	{ { { 18, 0, 3 }, { 11, 31, 0 } } },
-	{ { { 18, 0, 2 }, { 11, 31, 1 } } },
-	{ { { 18, 0, 1 }, { 12, 30, 1 } } },
-	{ { { 18, 0, 0 }, { 12, 30, 0 } } },
-	{ { { 18, 0, 1 }, { 14, 27, 1 } } },
-	{ { { 18, 0, 2 }, { 14, 27, 0 } } },
-	{ { { 18, 0, 3 }, { 12, 31, 0 } } },
-	{ { { 18, 0, 4 }, { 13, 30, 1 } } },
-	{ { { 19, 0, 3 }, { 13, 30, 0 } } },
-	{ { { 19, 0, 2 }, { 16, 24, 0 } } },
-	{ { { 19, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 19, 0, 0 }, { 13, 31, 0 } } },
-	{ { { 19, 0, 1 }, { 13, 31, 1 } } },
-	{ { { 19, 0, 2 }, { 14, 30, 1 } } },
-	{ { { 19, 0, 3 }, { 14, 30, 0 } } },
-	{ { { 19, 0, 4 }, { 14, 31, 1 } } },
-	{ { { 20, 0, 4 }, { 14, 31, 0 } } },
-	{ { { 20, 0, 3 }, { 16, 27, 0 } } },
-	{ { { 20, 0, 2 }, { 15, 30, 1 } } },
-	{ { { 20, 0, 1 }, { 15, 30, 0 } } },
-	{ { { 20, 0, 0 }, { 16, 28, 0 } } },
-	{ { { 20, 0, 1 }, { 15, 31, 1 } } },
-	{ { { 20, 0, 2 }, { 15, 31, 0 } } },
-	{ { { 20, 0, 3 }, { 15, 31, 1 } } },
-	{ { { 20, 0, 4 }, { 16, 30, 1 } } },
-	{ { { 21, 0, 3 }, { 16, 30, 0 } } },
-	{ { { 21, 0, 2 }, { 18, 27, 1 } } },
-	{ { { 21, 0, 1 }, { 18, 27, 0 } } },
-	{ { { 21, 0, 0 }, { 16, 31, 0 } } },
-	{ { { 21, 0, 1 }, { 17, 30, 1 } } },
-	{ { { 21, 0, 2 }, { 17, 30, 0 } } },
-	{ { { 21, 0, 3 }, { 20, 24, 0 } } },
-	{ { { 21, 0, 4 }, { 17, 31, 1 } } },
-	{ { { 22, 0, 3 }, { 17, 31, 0 } } },
-	{ { { 22, 0, 2 }, { 17, 31, 1 } } },
-	{ { { 22, 0, 1 }, { 18, 30, 1 } } },
-	{ { { 22, 0, 0 }, { 18, 30, 0 } } },
-	{ { { 22, 0, 1 }, { 18, 31, 1 } } },
-	{ { { 22, 0, 2 }, { 18, 31, 0 } } },
-	{ { { 22, 0, 3 }, { 20, 27, 0 } } },
-	{ { { 22, 0, 4 }, { 19, 30, 1 } } },
-	{ { { 23, 0, 3 }, { 19, 30, 0 } } },
-	{ { { 23, 0, 2 }, { 20, 28, 0 } } },
-	{ { { 23, 0, 1 }, { 19, 31, 1 } } },
-	{ { { 23, 0, 0 }, { 19, 31, 0 } } },
-	{ { { 23, 0, 1 }, { 19, 31, 1 } } },
-	{ { { 23, 0, 2 }, { 20, 30, 1 } } },
-	{ { { 23, 0, 3 }, { 20, 30, 0 } } },
-	{ { { 23, 0, 4 }, { 22, 27, 1 } } },
-	{ { { 24, 0, 4 }, { 22, 27, 0 } } },
-	{ { { 24, 0, 3 }, { 20, 31, 0 } } },
-	{ { { 24, 0, 2 }, { 21, 30, 1 } } },
-	{ { { 24, 0, 1 }, { 21, 30, 0 } } },
-	{ { { 24, 0, 0 }, { 24, 24, 0 } } },
-	{ { { 24, 0, 1 }, { 21, 31, 1 } } },
-	{ { { 24, 0, 2 }, { 21, 31, 0 } } },
-	{ { { 24, 0, 3 }, { 21, 31, 1 } } },
-	{ { { 24, 0, 4 }, { 22, 30, 1 } } },
-	{ { { 25, 0, 3 }, { 22, 30, 0 } } },
-	{ { { 25, 0, 2 }, { 22, 31, 1 } } },
-	{ { { 25, 0, 1 }, { 22, 31, 0 } } },
-	{ { { 25, 0, 0 }, { 24, 27, 0 } } },
-	{ { { 25, 0, 1 }, { 23, 30, 1 } } },
-	{ { { 25, 0, 2 }, { 23, 30, 0 } } },
-	{ { { 25, 0, 3 }, { 24, 28, 0 } } },
-	{ { { 25, 0, 4 }, { 23, 31, 1 } } },
-	{ { { 26, 0, 3 }, { 23, 31, 0 } } },
-	{ { { 26, 0, 2 }, { 23, 31, 1 } } },
-	{ { { 26, 0, 1 }, { 24, 30, 1 } } },
-	{ { { 26, 0, 0 }, { 24, 30, 0 } } },
-	{ { { 26, 0, 1 }, { 26, 27, 1 } } },
-	{ { { 26, 0, 2 }, { 26, 27, 0 } } },
-	{ { { 26, 0, 3 }, { 24, 31, 0 } } },
-	{ { { 26, 0, 4 }, { 25, 30, 1 } } },
-	{ { { 27, 0, 3 }, { 25, 30, 0 } } },
-	{ { { 27, 0, 2 }, { 28, 24, 0 } } },
-	{ { { 27, 0, 1 }, { 25, 31, 1 } } },
-	{ { { 27, 0, 0 }, { 25, 31, 0 } } },
-	{ { { 27, 0, 1 }, { 25, 31, 1 } } },
-	{ { { 27, 0, 2 }, { 26, 30, 1 } } },
-	{ { { 27, 0, 3 }, { 26, 30, 0 } } },
-	{ { { 27, 0, 4 }, { 26, 31, 1 } } },
-	{ { { 28, 0, 4 }, { 26, 31, 0 } } },
-	{ { { 28, 0, 3 }, { 28, 27, 0 } } },
-	{ { { 28, 0, 2 }, { 27, 30, 1 } } },
-	{ { { 28, 0, 1 }, { 27, 30, 0 } } },
-	{ { { 28, 0, 0 }, { 28, 28, 0 } } },
-	{ { { 28, 0, 1 }, { 27, 31, 1 } } },
-	{ { { 28, 0, 2 }, { 27, 31, 0 } } },
-	{ { { 28, 0, 3 }, { 27, 31, 1 } } },
-	{ { { 28, 0, 4 }, { 28, 30, 1 } } },
-	{ { { 29, 0, 3 }, { 28, 30, 0 } } },
-	{ { { 29, 0, 2 }, { 30, 27, 1 } } },
-	{ { { 29, 0, 1 }, { 30, 27, 0 } } },
-	{ { { 29, 0, 0 }, { 28, 31, 0 } } },
-	{ { { 29, 0, 1 }, { 29, 30, 1 } } },
-	{ { { 29, 0, 2 }, { 29, 30, 0 } } },
-	{ { { 29, 0, 3 }, { 29, 30, 1 } } },
-	{ { { 29, 0, 4 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 3 }, { 29, 31, 0 } } },
-	{ { { 30, 0, 2 }, { 29, 31, 1 } } },
-	{ { { 30, 0, 1 }, { 30, 30, 1 } } },
-	{ { { 30, 0, 0 }, { 30, 30, 0 } } },
-	{ { { 30, 0, 1 }, { 30, 31, 1 } } },
-	{ { { 30, 0, 2 }, { 30, 31, 0 } } },
-	{ { { 30, 0, 3 }, { 30, 31, 1 } } },
-	{ { { 30, 0, 4 }, { 31, 30, 1 } } },
-	{ { { 31, 0, 3 }, { 31, 30, 0 } } },
-	{ { { 31, 0, 2 }, { 31, 30, 1 } } },
-	{ { { 31, 0, 1 }, { 31, 31, 1 } } },
-	{ { { 31, 0, 0 }, { 31, 31, 0 } } }
-};
-
-static SingleColourLookup const lookup_6_4[] = 
-{
-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
-	{ { { 0, 0, 1 }, { 0, 1, 0 } } },
-	{ { { 0, 0, 2 }, { 0, 2, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
-	{ { { 1, 0, 0 }, { 0, 3, 0 } } },
-	{ { { 1, 0, 1 }, { 0, 4, 0 } } },
-	{ { { 1, 0, 2 }, { 0, 5, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 6, 1 } } },
-	{ { { 2, 0, 0 }, { 0, 6, 0 } } },
-	{ { { 2, 0, 1 }, { 0, 7, 0 } } },
-	{ { { 2, 0, 2 }, { 0, 8, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
-	{ { { 3, 0, 0 }, { 0, 9, 0 } } },
-	{ { { 3, 0, 1 }, { 0, 10, 0 } } },
-	{ { { 3, 0, 2 }, { 0, 11, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 12, 1 } } },
-	{ { { 4, 0, 0 }, { 0, 12, 0 } } },
-	{ { { 4, 0, 1 }, { 0, 13, 0 } } },
-	{ { { 4, 0, 2 }, { 0, 14, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 15, 1 } } },
-	{ { { 5, 0, 0 }, { 0, 15, 0 } } },
-	{ { { 5, 0, 1 }, { 0, 16, 0 } } },
-	{ { { 5, 0, 2 }, { 1, 15, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 17, 0 } } },
-	{ { { 6, 0, 0 }, { 0, 18, 0 } } },
-	{ { { 6, 0, 1 }, { 0, 19, 0 } } },
-	{ { { 6, 0, 2 }, { 3, 14, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 20, 0 } } },
-	{ { { 7, 0, 0 }, { 0, 21, 0 } } },
-	{ { { 7, 0, 1 }, { 0, 22, 0 } } },
-	{ { { 7, 0, 2 }, { 4, 15, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 23, 0 } } },
-	{ { { 8, 0, 0 }, { 0, 24, 0 } } },
-	{ { { 8, 0, 1 }, { 0, 25, 0 } } },
-	{ { { 8, 0, 2 }, { 6, 14, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 26, 0 } } },
-	{ { { 9, 0, 0 }, { 0, 27, 0 } } },
-	{ { { 9, 0, 1 }, { 0, 28, 0 } } },
-	{ { { 9, 0, 2 }, { 7, 15, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 29, 0 } } },
-	{ { { 10, 0, 0 }, { 0, 30, 0 } } },
-	{ { { 10, 0, 1 }, { 0, 31, 0 } } },
-	{ { { 10, 0, 2 }, { 9, 14, 0 } } },
-	{ { { 11, 0, 1 }, { 0, 32, 0 } } },
-	{ { { 11, 0, 0 }, { 0, 33, 0 } } },
-	{ { { 11, 0, 1 }, { 2, 30, 0 } } },
-	{ { { 11, 0, 2 }, { 0, 34, 0 } } },
-	{ { { 12, 0, 1 }, { 0, 35, 0 } } },
-	{ { { 12, 0, 0 }, { 0, 36, 0 } } },
-	{ { { 12, 0, 1 }, { 3, 31, 0 } } },
-	{ { { 12, 0, 2 }, { 0, 37, 0 } } },
-	{ { { 13, 0, 1 }, { 0, 38, 0 } } },
-	{ { { 13, 0, 0 }, { 0, 39, 0 } } },
-	{ { { 13, 0, 1 }, { 5, 30, 0 } } },
-	{ { { 13, 0, 2 }, { 0, 40, 0 } } },
-	{ { { 14, 0, 1 }, { 0, 41, 0 } } },
-	{ { { 14, 0, 0 }, { 0, 42, 0 } } },
-	{ { { 14, 0, 1 }, { 6, 31, 0 } } },
-	{ { { 14, 0, 2 }, { 0, 43, 0 } } },
-	{ { { 15, 0, 1 }, { 0, 44, 0 } } },
-	{ { { 15, 0, 0 }, { 0, 45, 0 } } },
-	{ { { 15, 0, 1 }, { 8, 30, 0 } } },
-	{ { { 15, 0, 2 }, { 0, 46, 0 } } },
-	{ { { 16, 0, 2 }, { 0, 47, 0 } } },
-	{ { { 16, 0, 1 }, { 1, 46, 0 } } },
-	{ { { 16, 0, 0 }, { 0, 48, 0 } } },
-	{ { { 16, 0, 1 }, { 0, 49, 0 } } },
-	{ { { 16, 0, 2 }, { 0, 50, 0 } } },
-	{ { { 17, 0, 1 }, { 2, 47, 0 } } },
-	{ { { 17, 0, 0 }, { 0, 51, 0 } } },
-	{ { { 17, 0, 1 }, { 0, 52, 0 } } },
-	{ { { 17, 0, 2 }, { 0, 53, 0 } } },
-	{ { { 18, 0, 1 }, { 4, 46, 0 } } },
-	{ { { 18, 0, 0 }, { 0, 54, 0 } } },
-	{ { { 18, 0, 1 }, { 0, 55, 0 } } },
-	{ { { 18, 0, 2 }, { 0, 56, 0 } } },
-	{ { { 19, 0, 1 }, { 5, 47, 0 } } },
-	{ { { 19, 0, 0 }, { 0, 57, 0 } } },
-	{ { { 19, 0, 1 }, { 0, 58, 0 } } },
-	{ { { 19, 0, 2 }, { 0, 59, 0 } } },
-	{ { { 20, 0, 1 }, { 7, 46, 0 } } },
-	{ { { 20, 0, 0 }, { 0, 60, 0 } } },
-	{ { { 20, 0, 1 }, { 0, 61, 0 } } },
-	{ { { 20, 0, 2 }, { 0, 62, 0 } } },
-	{ { { 21, 0, 1 }, { 8, 47, 0 } } },
-	{ { { 21, 0, 0 }, { 0, 63, 0 } } },
-	{ { { 21, 0, 1 }, { 1, 62, 0 } } },
-	{ { { 21, 0, 2 }, { 1, 63, 0 } } },
-	{ { { 22, 0, 1 }, { 10, 46, 0 } } },
-	{ { { 22, 0, 0 }, { 2, 62, 0 } } },
-	{ { { 22, 0, 1 }, { 2, 63, 0 } } },
-	{ { { 22, 0, 2 }, { 3, 62, 0 } } },
-	{ { { 23, 0, 1 }, { 11, 47, 0 } } },
-	{ { { 23, 0, 0 }, { 3, 63, 0 } } },
-	{ { { 23, 0, 1 }, { 4, 62, 0 } } },
-	{ { { 23, 0, 2 }, { 4, 63, 0 } } },
-	{ { { 24, 0, 1 }, { 13, 46, 0 } } },
-	{ { { 24, 0, 0 }, { 5, 62, 0 } } },
-	{ { { 24, 0, 1 }, { 5, 63, 0 } } },
-	{ { { 24, 0, 2 }, { 6, 62, 0 } } },
-	{ { { 25, 0, 1 }, { 14, 47, 0 } } },
-	{ { { 25, 0, 0 }, { 6, 63, 0 } } },
-	{ { { 25, 0, 1 }, { 7, 62, 0 } } },
-	{ { { 25, 0, 2 }, { 7, 63, 0 } } },
-	{ { { 26, 0, 1 }, { 16, 45, 0 } } },
-	{ { { 26, 0, 0 }, { 8, 62, 0 } } },
-	{ { { 26, 0, 1 }, { 8, 63, 0 } } },
-	{ { { 26, 0, 2 }, { 9, 62, 0 } } },
-	{ { { 27, 0, 1 }, { 16, 48, 0 } } },
-	{ { { 27, 0, 0 }, { 9, 63, 0 } } },
-	{ { { 27, 0, 1 }, { 10, 62, 0 } } },
-	{ { { 27, 0, 2 }, { 10, 63, 0 } } },
-	{ { { 28, 0, 1 }, { 16, 51, 0 } } },
-	{ { { 28, 0, 0 }, { 11, 62, 0 } } },
-	{ { { 28, 0, 1 }, { 11, 63, 0 } } },
-	{ { { 28, 0, 2 }, { 12, 62, 0 } } },
-	{ { { 29, 0, 1 }, { 16, 54, 0 } } },
-	{ { { 29, 0, 0 }, { 12, 63, 0 } } },
-	{ { { 29, 0, 1 }, { 13, 62, 0 } } },
-	{ { { 29, 0, 2 }, { 13, 63, 0 } } },
-	{ { { 30, 0, 1 }, { 16, 57, 0 } } },
-	{ { { 30, 0, 0 }, { 14, 62, 0 } } },
-	{ { { 30, 0, 1 }, { 14, 63, 0 } } },
-	{ { { 30, 0, 2 }, { 15, 62, 0 } } },
-	{ { { 31, 0, 1 }, { 16, 60, 0 } } },
-	{ { { 31, 0, 0 }, { 15, 63, 0 } } },
-	{ { { 31, 0, 1 }, { 24, 46, 0 } } },
-	{ { { 31, 0, 2 }, { 16, 62, 0 } } },
-	{ { { 32, 0, 2 }, { 16, 63, 0 } } },
-	{ { { 32, 0, 1 }, { 17, 62, 0 } } },
-	{ { { 32, 0, 0 }, { 25, 47, 0 } } },
-	{ { { 32, 0, 1 }, { 17, 63, 0 } } },
-	{ { { 32, 0, 2 }, { 18, 62, 0 } } },
-	{ { { 33, 0, 1 }, { 18, 63, 0 } } },
-	{ { { 33, 0, 0 }, { 27, 46, 0 } } },
-	{ { { 33, 0, 1 }, { 19, 62, 0 } } },
-	{ { { 33, 0, 2 }, { 19, 63, 0 } } },
-	{ { { 34, 0, 1 }, { 20, 62, 0 } } },
-	{ { { 34, 0, 0 }, { 28, 47, 0 } } },
-	{ { { 34, 0, 1 }, { 20, 63, 0 } } },
-	{ { { 34, 0, 2 }, { 21, 62, 0 } } },
-	{ { { 35, 0, 1 }, { 21, 63, 0 } } },
-	{ { { 35, 0, 0 }, { 30, 46, 0 } } },
-	{ { { 35, 0, 1 }, { 22, 62, 0 } } },
-	{ { { 35, 0, 2 }, { 22, 63, 0 } } },
-	{ { { 36, 0, 1 }, { 23, 62, 0 } } },
-	{ { { 36, 0, 0 }, { 31, 47, 0 } } },
-	{ { { 36, 0, 1 }, { 23, 63, 0 } } },
-	{ { { 36, 0, 2 }, { 24, 62, 0 } } },
-	{ { { 37, 0, 1 }, { 24, 63, 0 } } },
-	{ { { 37, 0, 0 }, { 32, 47, 0 } } },
-	{ { { 37, 0, 1 }, { 25, 62, 0 } } },
-	{ { { 37, 0, 2 }, { 25, 63, 0 } } },
-	{ { { 38, 0, 1 }, { 26, 62, 0 } } },
-	{ { { 38, 0, 0 }, { 32, 50, 0 } } },
-	{ { { 38, 0, 1 }, { 26, 63, 0 } } },
-	{ { { 38, 0, 2 }, { 27, 62, 0 } } },
-	{ { { 39, 0, 1 }, { 27, 63, 0 } } },
-	{ { { 39, 0, 0 }, { 32, 53, 0 } } },
-	{ { { 39, 0, 1 }, { 28, 62, 0 } } },
-	{ { { 39, 0, 2 }, { 28, 63, 0 } } },
-	{ { { 40, 0, 1 }, { 29, 62, 0 } } },
-	{ { { 40, 0, 0 }, { 32, 56, 0 } } },
-	{ { { 40, 0, 1 }, { 29, 63, 0 } } },
-	{ { { 40, 0, 2 }, { 30, 62, 0 } } },
-	{ { { 41, 0, 1 }, { 30, 63, 0 } } },
-	{ { { 41, 0, 0 }, { 32, 59, 0 } } },
-	{ { { 41, 0, 1 }, { 31, 62, 0 } } },
-	{ { { 41, 0, 2 }, { 31, 63, 0 } } },
-	{ { { 42, 0, 1 }, { 32, 61, 0 } } },
-	{ { { 42, 0, 0 }, { 32, 62, 0 } } },
-	{ { { 42, 0, 1 }, { 32, 63, 0 } } },
-	{ { { 42, 0, 2 }, { 41, 46, 0 } } },
-	{ { { 43, 0, 1 }, { 33, 62, 0 } } },
-	{ { { 43, 0, 0 }, { 33, 63, 0 } } },
-	{ { { 43, 0, 1 }, { 34, 62, 0 } } },
-	{ { { 43, 0, 2 }, { 42, 47, 0 } } },
-	{ { { 44, 0, 1 }, { 34, 63, 0 } } },
-	{ { { 44, 0, 0 }, { 35, 62, 0 } } },
-	{ { { 44, 0, 1 }, { 35, 63, 0 } } },
-	{ { { 44, 0, 2 }, { 44, 46, 0 } } },
-	{ { { 45, 0, 1 }, { 36, 62, 0 } } },
-	{ { { 45, 0, 0 }, { 36, 63, 0 } } },
-	{ { { 45, 0, 1 }, { 37, 62, 0 } } },
-	{ { { 45, 0, 2 }, { 45, 47, 0 } } },
-	{ { { 46, 0, 1 }, { 37, 63, 0 } } },
-	{ { { 46, 0, 0 }, { 38, 62, 0 } } },
-	{ { { 46, 0, 1 }, { 38, 63, 0 } } },
-	{ { { 46, 0, 2 }, { 47, 46, 0 } } },
-	{ { { 47, 0, 1 }, { 39, 62, 0 } } },
-	{ { { 47, 0, 0 }, { 39, 63, 0 } } },
-	{ { { 47, 0, 1 }, { 40, 62, 0 } } },
-	{ { { 47, 0, 2 }, { 48, 46, 0 } } },
-	{ { { 48, 0, 2 }, { 40, 63, 0 } } },
-	{ { { 48, 0, 1 }, { 41, 62, 0 } } },
-	{ { { 48, 0, 0 }, { 41, 63, 0 } } },
-	{ { { 48, 0, 1 }, { 48, 49, 0 } } },
-	{ { { 48, 0, 2 }, { 42, 62, 0 } } },
-	{ { { 49, 0, 1 }, { 42, 63, 0 } } },
-	{ { { 49, 0, 0 }, { 43, 62, 0 } } },
-	{ { { 49, 0, 1 }, { 48, 52, 0 } } },
-	{ { { 49, 0, 2 }, { 43, 63, 0 } } },
-	{ { { 50, 0, 1 }, { 44, 62, 0 } } },
-	{ { { 50, 0, 0 }, { 44, 63, 0 } } },
-	{ { { 50, 0, 1 }, { 48, 55, 0 } } },
-	{ { { 50, 0, 2 }, { 45, 62, 0 } } },
-	{ { { 51, 0, 1 }, { 45, 63, 0 } } },
-	{ { { 51, 0, 0 }, { 46, 62, 0 } } },
-	{ { { 51, 0, 1 }, { 48, 58, 0 } } },
-	{ { { 51, 0, 2 }, { 46, 63, 0 } } },
-	{ { { 52, 0, 1 }, { 47, 62, 0 } } },
-	{ { { 52, 0, 0 }, { 47, 63, 0 } } },
-	{ { { 52, 0, 1 }, { 48, 61, 0 } } },
-	{ { { 52, 0, 2 }, { 48, 62, 0 } } },
-	{ { { 53, 0, 1 }, { 56, 47, 0 } } },
-	{ { { 53, 0, 0 }, { 48, 63, 0 } } },
-	{ { { 53, 0, 1 }, { 49, 62, 0 } } },
-	{ { { 53, 0, 2 }, { 49, 63, 0 } } },
-	{ { { 54, 0, 1 }, { 58, 46, 0 } } },
-	{ { { 54, 0, 0 }, { 50, 62, 0 } } },
-	{ { { 54, 0, 1 }, { 50, 63, 0 } } },
-	{ { { 54, 0, 2 }, { 51, 62, 0 } } },
-	{ { { 55, 0, 1 }, { 59, 47, 0 } } },
-	{ { { 55, 0, 0 }, { 51, 63, 0 } } },
-	{ { { 55, 0, 1 }, { 52, 62, 0 } } },
-	{ { { 55, 0, 2 }, { 52, 63, 0 } } },
-	{ { { 56, 0, 1 }, { 61, 46, 0 } } },
-	{ { { 56, 0, 0 }, { 53, 62, 0 } } },
-	{ { { 56, 0, 1 }, { 53, 63, 0 } } },
-	{ { { 56, 0, 2 }, { 54, 62, 0 } } },
-	{ { { 57, 0, 1 }, { 62, 47, 0 } } },
-	{ { { 57, 0, 0 }, { 54, 63, 0 } } },
-	{ { { 57, 0, 1 }, { 55, 62, 0 } } },
-	{ { { 57, 0, 2 }, { 55, 63, 0 } } },
-	{ { { 58, 0, 1 }, { 56, 62, 1 } } },
-	{ { { 58, 0, 0 }, { 56, 62, 0 } } },
-	{ { { 58, 0, 1 }, { 56, 63, 0 } } },
-	{ { { 58, 0, 2 }, { 57, 62, 0 } } },
-	{ { { 59, 0, 1 }, { 57, 63, 1 } } },
-	{ { { 59, 0, 0 }, { 57, 63, 0 } } },
-	{ { { 59, 0, 1 }, { 58, 62, 0 } } },
-	{ { { 59, 0, 2 }, { 58, 63, 0 } } },
-	{ { { 60, 0, 1 }, { 59, 62, 1 } } },
-	{ { { 60, 0, 0 }, { 59, 62, 0 } } },
-	{ { { 60, 0, 1 }, { 59, 63, 0 } } },
-	{ { { 60, 0, 2 }, { 60, 62, 0 } } },
-	{ { { 61, 0, 1 }, { 60, 63, 1 } } },
-	{ { { 61, 0, 0 }, { 60, 63, 0 } } },
-	{ { { 61, 0, 1 }, { 61, 62, 0 } } },
-	{ { { 61, 0, 2 }, { 61, 63, 0 } } },
-	{ { { 62, 0, 1 }, { 62, 62, 1 } } },
-	{ { { 62, 0, 0 }, { 62, 62, 0 } } },
-	{ { { 62, 0, 1 }, { 62, 63, 0 } } },
-	{ { { 62, 0, 2 }, { 63, 62, 0 } } },
-	{ { { 63, 0, 1 }, { 63, 63, 1 } } },
-	{ { { 63, 0, 0 }, { 63, 63, 0 } } }
-};

+ 0 - 260
3rdparty/libsquish/squish.cpp

@@ -1,260 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#include "squish.h"
-#include "colourset.h"
-#include "maths.h"
-#include "rangefit.h"
-#include "clusterfit.h"
-#include "colourblock.h"
-#include "alpha.h"
-#include "singlecolourfit.h"
-
-namespace squish {
-
-static int FixFlags( int flags )
-{
-	// grab the flag bits
-	int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
-	int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
-	int extra = flags & kWeightColourByAlpha;
-	
-	// set defaults
-	if ( method != kDxt3
-	&&   method != kDxt5
-	&&   method != kBc4
-	&&   method != kBc5 )
-	{
-		method = kDxt1;
-	}
-	if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
-		fit = kColourClusterFit;
-		
-	// done
-	return method | fit | extra;
-}
-
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
-	{
-		u8 alpha[16*4];
-		for( int i = 0; i < 16; ++i )
-		{
-			alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
-		}
-
-		u8* rBlock = reinterpret_cast< u8* >( block );
-		CompressAlphaDxt5( alpha, mask, rBlock );
-
-		if ( ( flags & ( kBc5 ) ) != 0 )
-		{
-			for( int i = 0; i < 16; ++i )
-			{
-				alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
-			}
-
-			u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
-			CompressAlphaDxt5( alpha, mask, gBlock );
-		}
-
-		return;
-	}
-
-	// get the block locations
-	void* colourBlock = block;
-	void* alphaBlock = block;
-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
-		colourBlock = reinterpret_cast< u8* >( block ) + 8;
-
-	// create the minimal point set
-	ColourSet colours( rgba, mask, flags );
-	
-	// check the compression type and compress colour
-	if( colours.GetCount() == 1 )
-	{
-		// always do a single colour fit
-		SingleColourFit fit( &colours, flags );
-		fit.Compress( colourBlock );
-	}
-	else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
-	{
-		// do a range fit
-		RangeFit fit( &colours, flags, metric );
-		fit.Compress( colourBlock );
-	}
-	else
-	{
-		// default to a cluster fit (could be iterative or not)
-		ClusterFit fit( &colours, flags, metric );
-		fit.Compress( colourBlock );
-	}
-	
-	// compress alpha separately if necessary
-	if( ( flags & kDxt3 ) != 0 )
-		CompressAlphaDxt3( rgba, mask, alphaBlock );
-	else if( ( flags & kDxt5 ) != 0 )
-		CompressAlphaDxt5( rgba, mask, alphaBlock );
-}
-
-void Decompress( u8* rgba, void const* block, int flags )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// get the block locations
-	void const* colourBlock = block;
-	void const* alphaBock = block;
-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
-		colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
-
-	// decompress colour
-	DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
-
-	// decompress alpha separately if necessary
-	if( ( flags & kDxt3 ) != 0 )
-		DecompressAlphaDxt3( rgba, alphaBock );
-	else if( ( flags & kDxt5 ) != 0 )
-		DecompressAlphaDxt5( rgba, alphaBock );
-}
-
-int GetStorageRequirements( int width, int height, int flags )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-	
-	// compute the storage requirements
-	int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
-	int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
-	return blockcount*blocksize;
-}
-
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// initialise the block output
-	u8* targetBlock = reinterpret_cast< u8* >( blocks );
-	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
-
-	// loop over blocks
-	for( int y = 0; y < height; y += 4 )
-	{
-		for( int x = 0; x < width; x += 4 )
-		{
-			// build the 4x4 block of pixels
-			u8 sourceRgba[16*4];
-			u8* targetPixel = sourceRgba;
-			int mask = 0;
-			for( int py = 0; py < 4; ++py )
-			{
-				for( int px = 0; px < 4; ++px )
-				{
-					// get the source pixel in the image
-					int sx = x + px;
-					int sy = y + py;
-					
-					// enable if we're in the image
-					if( sx < width && sy < height )
-					{
-						// copy the rgba value
-						u8 const* sourcePixel = rgba + 4*( width*sy + sx );
-						for( int i = 0; i < 4; ++i )
-							*targetPixel++ = *sourcePixel++;
-							
-						// enable this pixel
-						mask |= ( 1 << ( 4*py + px ) );
-					}
-					else
-					{
-						// skip this pixel as its outside the image
-						targetPixel += 4;
-					}
-				}
-			}
-			
-			// compress it into the output
-			CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
-			
-			// advance
-			targetBlock += bytesPerBlock;
-		}
-	}
-}
-
-void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
-{
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// initialise the block input
-	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
-	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
-
-	// loop over blocks
-	for( int y = 0; y < height; y += 4 )
-	{
-		for( int x = 0; x < width; x += 4 )
-		{
-			// decompress the block
-			u8 targetRgba[4*16];
-			Decompress( targetRgba, sourceBlock, flags );
-			
-			// write the decompressed pixels to the correct image locations
-			u8 const* sourcePixel = targetRgba;
-			for( int py = 0; py < 4; ++py )
-			{
-				for( int px = 0; px < 4; ++px )
-				{
-					// get the target location
-					int sx = x + px;
-					int sy = y + py;
-					if( sx < width && sy < height )
-					{
-						u8* targetPixel = rgba + 4*( width*sy + sx );
-						
-						// copy the rgba value
-						for( int i = 0; i < 4; ++i )
-							*targetPixel++ = *sourcePixel++;
-					}
-					else
-					{
-						// skip this pixel as its outside the image
-						sourcePixel += 4;
-					}
-				}
-			}
-			
-			// advance
-			sourceBlock += bytesPerBlock;
-		}
-	}
-}
-
-} // namespace squish

+ 0 - 269
3rdparty/libsquish/squish.h

@@ -1,269 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_H
-#define SQUISH_H
-
-//! All squish API functions live in this namespace.
-namespace squish {
-
-// -----------------------------------------------------------------------------
-
-//! Typedef a quantity that is a single unsigned byte.
-typedef unsigned char u8;
-
-// -----------------------------------------------------------------------------
-
-enum
-{
-	//! Use DXT1 compression.
-	kDxt1 = ( 1 << 0 ),
-
-	//! Use DXT3 compression.
-	kDxt3 = ( 1 << 1 ),
-
-	//! Use DXT5 compression.
-	kDxt5 = ( 1 << 2 ),
-
-	//! Use BC4 compression.
-	kBc4 = ( 1 << 3 ),
-
-	//! Use BC5 compression.
-	kBc5 = ( 1 << 4 ),
-
-	//! Use a slow but high quality colour compressor (the default).
-	kColourClusterFit = ( 1 << 5 ),
-
-	//! Use a fast but low quality colour compressor.
-	kColourRangeFit	= ( 1 << 6 ),
-
-	//! Weight the colour by alpha during cluster fit (disabled by default).
-	kWeightColourByAlpha = ( 1 << 7 ),
-
-	//! Use a very slow but very high quality colour compressor.
-	kColourIterativeClusterFit = ( 1 << 8 ),
-};
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Compresses a 4x4 block of pixels.
-
-	@param rgba		The rgba values of the 16 source pixels.
-	@param mask		The valid pixel mask.
-	@param block	Storage for the compressed DXT block.
-	@param flags	Compression flags.
-	@param metric	An optional perceptual metric.
-	
-	The source pixels should be presented as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-		
-	The mask parameter enables only certain pixels within the block. The lowest
-	bit enables the first pixel and so on up to the 16th bit. Bits beyond the
-	16th bit are ignored. Pixels that are not enabled are allowed to take
-	arbitrary colours in the output block. An example of how this can be used
-	is in the CompressImage function to disable pixels outside the bounds of
-	the image when the width or height is not divisible by 4.
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for the compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor to use 
-	when fitting the RGB components of the data. Possible colour compressors 
-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
-	quality) or kColourIterativeClusterFit (slowest, best quality).
-		
-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
-	flag can be specified to weight the importance of each pixel by its alpha 
-	value. For images that are rendered using alpha blending, this can 
-	significantly increase the perceived quality.
-	
-	The metric parameter can be used to weight the relative importance of each
-	colour channel, or pass NULL to use the default uniform weight of 
-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
-	allowed either uniform or "perceptual" weights with the fixed values
-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
-	contiguous array of 3 floats.
-*/
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Compresses a 4x4 block of pixels.
-
-	@param rgba		The rgba values of the 16 source pixels.
-	@param block	Storage for the compressed DXT block.
-	@param flags	Compression flags.
-	@param metric	An optional perceptual metric.
-	
-	The source pixels should be presented as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for the compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor to use 
-	when fitting the RGB components of the data. Possible colour compressors 
-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
-	quality) or kColourIterativeClusterFit (slowest, best quality).
-		
-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
-	flag can be specified to weight the importance of each pixel by its alpha 
-	value. For images that are rendered using alpha blending, this can 
-	significantly increase the perceived quality.
-	
-	The metric parameter can be used to weight the relative importance of each
-	colour channel, or pass NULL to use the default uniform weight of 
-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
-	allowed either uniform or "perceptual" weights with the fixed values
-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
-	contiguous array of 3 floats.
-	
-	This method is an inline that calls CompressMasked with a mask of 0xffff, 
-	provided for compatibility with older versions of squish.
-*/
-inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
-{
-	CompressMasked( rgba, 0xffff, block, flags, metric );
-}
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Decompresses a 4x4 block of pixels.
-
-	@param rgba		Storage for the 16 decompressed pixels.
-	@param block	The compressed DXT block.
-	@param flags	Compression flags.
-
-	The decompressed pixels will be written as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this is:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. All other flags 
-	are ignored.
-*/
-void Decompress( u8* rgba, void const* block, int flags );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Computes the amount of compressed storage required.
-
-	@param width	The width of the image.
-	@param height	The height of the image.
-	@param flags	Compression flags.
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. All other flags 
-	are ignored.
-	
-	Most DXT images will be a multiple of 4 in each dimension, but this 
-	function supports arbitrary size images by allowing the outer blocks to
-	be only partially used.
-*/
-int GetStorageRequirements( int width, int height, int flags );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Compresses an image in memory.
-
-	@param rgba		The pixels of the source.
-	@param width	The width of the source image.
-	@param height	The height of the source image.
-	@param blocks	Storage for the compressed output.
-	@param flags	Compression flags.
-	@param metric	An optional perceptual metric.
-	
-	The source pixels should be presented as a contiguous array of width*height
-	rgba values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
-		
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for each compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor to use 
-	when fitting the RGB components of the data. Possible colour compressors 
-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
-	quality) or kColourIterativeClusterFit (slowest, best quality).
-		
-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
-	flag can be specified to weight the importance of each pixel by its alpha 
-	value. For images that are rendered using alpha blending, this can 
-	significantly increase the perceived quality.
-	
-	The metric parameter can be used to weight the relative importance of each
-	colour channel, or pass NULL to use the default uniform weight of 
-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
-	allowed either uniform or "perceptual" weights with the fixed values
-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
-	contiguous array of 3 floats.
-	
-	Internally this function calls squish::CompressMasked for each block, which 
-	allows for pixels outside the image to take arbitrary values. The function 
-	squish::GetStorageRequirements can be called to compute the amount of memory
-	to allocate for the compressed output.
-*/
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
-
-// -----------------------------------------------------------------------------
-
-/*! @brief Decompresses an image in memory.
-
-	@param rgba		Storage for the decompressed pixels.
-	@param width	The width of the source image.
-	@param height	The height of the source image.
-	@param blocks	The compressed DXT blocks.
-	@param flags	Compression flags.
-	
-	The decompressed pixels will be written as a contiguous array of width*height
-	16 rgba values, with each component as 1 byte each. In memory this is:
-	
-		{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
-		
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. All other flags 
-	are ignored.
-
-	Internally this function calls squish::Decompress for each block.
-*/
-void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
-
-// -----------------------------------------------------------------------------
-
-} // namespace squish
-
-#endif // ndef SQUISH_H
-

+ 0 - 10
3rdparty/lodepng/README.md

@@ -1,10 +0,0 @@
-LodePNG
--------
-
-PNG encoder and decoder in C and C++.
-
-Home page: http://lodev.org/lodepng/
-
-Only two files are needed to allow your program to read and write PNG files: lodepng.cpp and lodepng.h.
-
-The other files in the project are just examples, unit tests, etc...

+ 0 - 6224
3rdparty/lodepng/lodepng.cpp

@@ -1,6224 +0,0 @@
-/*
-LodePNG version 20160501
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-*/
-
-/*
-The manual and changelog are in the header file "lodepng.h"
-Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
-*/
-
-#include "lodepng.h"
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
-#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
-#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
-#endif /*_MSC_VER */
-
-const char* LODEPNG_VERSION_STRING = "20160501";
-
-/*
-This source file is built up in the following large parts. The code sections
-with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
--Tools for C and common code for PNG and Zlib
--C Code for Zlib (huffman, deflate, ...)
--C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
--The C++ wrapper around all of the above
-*/
-
-/*The malloc, realloc and free functions defined here with "lodepng_" in front
-of the name, so that you can easily change them to others related to your
-platform if needed. Everything else in the code calls these. Pass
--DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
-#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
-define them in your own project's source files without needing to change
-lodepng source code. Don't forget to remove "static" if you copypaste them
-from here.*/
-
-#ifdef LODEPNG_COMPILE_ALLOCATORS
-static void* lodepng_malloc(size_t size)
-{
-  return malloc(size);
-}
-
-static void* lodepng_realloc(void* ptr, size_t new_size)
-{
-  return realloc(ptr, new_size);
-}
-
-static void lodepng_free(void* ptr)
-{
-  free(ptr);
-}
-#else /*LODEPNG_COMPILE_ALLOCATORS*/
-void* lodepng_malloc(size_t size);
-void* lodepng_realloc(void* ptr, size_t new_size);
-void lodepng_free(void* ptr);
-#endif /*LODEPNG_COMPILE_ALLOCATORS*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // Tools for C, and common code for PNG and Zlib.                       // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*
-Often in case of an error a value is assigned to a variable and then it breaks
-out of a loop (to go to the cleanup phase of a function). This macro does that.
-It makes the error handling code shorter and more readable.
-
-Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
-*/
-#define CERROR_BREAK(errorvar, code)\
-{\
-  errorvar = code;\
-  break;\
-}
-
-/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
-#define ERROR_BREAK(code) CERROR_BREAK(error, code)
-
-/*Set error var to the error code, and return it.*/
-#define CERROR_RETURN_ERROR(errorvar, code)\
-{\
-  errorvar = code;\
-  return code;\
-}
-
-/*Try the code, if it returns error, also return the error.*/
-#define CERROR_TRY_RETURN(call)\
-{\
-  unsigned error = call;\
-  if(error) return error;\
-}
-
-/*Set error var to the error code, and return from the void function.*/
-#define CERROR_RETURN(errorvar, code)\
-{\
-  errorvar = code;\
-  return;\
-}
-
-/*
-About uivector, ucvector and string:
--All of them wrap dynamic arrays or text strings in a similar way.
--LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
--The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
--They're not used in the interface, only internally in this file as static functions.
--As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
-*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*dynamic vector of unsigned ints*/
-typedef struct uivector
-{
-  unsigned* data;
-  size_t size; /*size in number of unsigned longs*/
-  size_t allocsize; /*allocated size in bytes*/
-} uivector;
-
-static void uivector_cleanup(void* p)
-{
-  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
-  lodepng_free(((uivector*)p)->data);
-  ((uivector*)p)->data = NULL;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_reserve(uivector* p, size_t allocsize)
-{
-  if(allocsize > p->allocsize)
-  {
-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
-    void* data = lodepng_realloc(p->data, newsize);
-    if(data)
-    {
-      p->allocsize = newsize;
-      p->data = (unsigned*)data;
-    }
-    else return 0; /*error: not enough memory*/
-  }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_resize(uivector* p, size_t size)
-{
-  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
-  p->size = size;
-  return 1; /*success*/
-}
-
-/*resize and give all new elements the value*/
-static unsigned uivector_resizev(uivector* p, size_t size, unsigned value)
-{
-  size_t oldsize = p->size, i;
-  if(!uivector_resize(p, size)) return 0;
-  for(i = oldsize; i < size; ++i) p->data[i] = value;
-  return 1;
-}
-
-static void uivector_init(uivector* p)
-{
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_push_back(uivector* p, unsigned c)
-{
-  if(!uivector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-/* /////////////////////////////////////////////////////////////////////////// */
-
-/*dynamic vector of unsigned chars*/
-typedef struct ucvector
-{
-  unsigned char* data;
-  size_t size; /*used size*/
-  size_t allocsize; /*allocated size*/
-} ucvector;
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_reserve(ucvector* p, size_t allocsize)
-{
-  if(allocsize > p->allocsize)
-  {
-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
-    void* data = lodepng_realloc(p->data, newsize);
-    if(data)
-    {
-      p->allocsize = newsize;
-      p->data = (unsigned char*)data;
-    }
-    else return 0; /*error: not enough memory*/
-  }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_resize(ucvector* p, size_t size)
-{
-  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
-  p->size = size;
-  return 1; /*success*/
-}
-
-#ifdef LODEPNG_COMPILE_PNG
-
-static void ucvector_cleanup(void* p)
-{
-  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
-  lodepng_free(((ucvector*)p)->data);
-  ((ucvector*)p)->data = NULL;
-}
-
-static void ucvector_init(ucvector* p)
-{
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*you can both convert from vector to buffer&size and vica versa. If you use
-init_buffer to take over a buffer and size, it is not needed to use cleanup*/
-static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size)
-{
-  p->data = buffer;
-  p->allocsize = p->size = size;
-}
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_push_back(ucvector* p, unsigned char c)
-{
-  if(!ucvector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned string_resize(char** out, size_t size)
-{
-  char* data = (char*)lodepng_realloc(*out, size + 1);
-  if(data)
-  {
-    data[size] = 0; /*null termination char*/
-    *out = data;
-  }
-  return data != 0;
-}
-
-/*init a {char*, size_t} pair for use as string*/
-static void string_init(char** out)
-{
-  *out = NULL;
-  string_resize(out, 0);
-}
-
-/*free the above pair again*/
-static void string_cleanup(char** out)
-{
-  lodepng_free(*out);
-  *out = NULL;
-}
-
-static void string_set(char** out, const char* in)
-{
-  size_t insize = strlen(in), i;
-  if(string_resize(out, insize))
-  {
-    for(i = 0; i != insize; ++i)
-    {
-      (*out)[i] = in[i];
-    }
-  }
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-unsigned lodepng_read32bitInt(const unsigned char* buffer)
-{
-  return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
-}
-
-#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
-/*buffer must have at least 4 allocated bytes available*/
-static void lodepng_set32bitInt(unsigned char* buffer, unsigned value)
-{
-  buffer[0] = (unsigned char)((value >> 24) & 0xff);
-  buffer[1] = (unsigned char)((value >> 16) & 0xff);
-  buffer[2] = (unsigned char)((value >>  8) & 0xff);
-  buffer[3] = (unsigned char)((value      ) & 0xff);
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-static void lodepng_add32bitInt(ucvector* buffer, unsigned value)
-{
-  ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/
-  lodepng_set32bitInt(&buffer->data[buffer->size - 4], value);
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / File IO                                                                / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_DISK
-
-/* returns negative value on error. This should be pure C compatible, so no fstat. */
-static long lodepng_filesize(const char* filename)
-{
-  FILE* file;
-  long size;
-  file = fopen(filename, "rb");
-  if(!file) return -1;
-
-  if(fseek(file, 0, SEEK_END) != 0)
-  {
-    fclose(file);
-    return -1;
-  }
-
-  size = ftell(file);
-  /* It may give LONG_MAX as directory size, this is invalid for us. */
-  if(size == LONG_MAX) size = -1;
-
-  fclose(file);
-  return size;
-}
-
-/* load file into buffer that already has the correct allocated size. Returns error code.*/
-static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename)
-{
-  FILE* file;
-  size_t readsize;
-  file = fopen(filename, "rb");
-  if(!file) return 78;
-
-  readsize = fread(out, 1, size, file);
-  fclose(file);
-
-  if (readsize != size) return 78;
-  return 0;
-}
-
-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename)
-{
-  long size = lodepng_filesize(filename);
-  if (size < 0) return 78;
-  *outsize = (size_t)size;
-
-  *out = (unsigned char*)lodepng_malloc((size_t)size);
-  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
-
-  return lodepng_buffer_file(*out, (size_t)size, filename);
-}
-
-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename)
-{
-  FILE* file;
-  file = fopen(filename, "wb" );
-  if(!file) return 79;
-  fwrite((char*)buffer , 1 , buffersize, file);
-  fclose(file);
-  return 0;
-}
-
-#endif /*LODEPNG_COMPILE_DISK*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // End of common code and tools. Begin of Zlib related code.            // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_ENCODER
-/*TODO: this ignores potential out of memory errors*/
-#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\
-{\
-  /*add a new byte at the end*/\
-  if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\
-  /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\
-  (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\
-  ++(*bitpointer);\
-}
-
-static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
-{
-  size_t i;
-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1));
-}
-
-static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
-{
-  size_t i;
-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1));
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1)
-
-static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream)
-{
-  unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream));
-  ++(*bitpointer);
-  return result;
-}
-
-static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
-{
-  unsigned result = 0, i;
-  for(i = 0; i != nbits; ++i)
-  {
-    result += ((unsigned)READBIT(*bitpointer, bitstream)) << i;
-    ++(*bitpointer);
-  }
-  return result;
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Deflate - Huffman                                                      / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#define FIRST_LENGTH_CODE_INDEX 257
-#define LAST_LENGTH_CODE_INDEX 285
-/*256 literals, the end code, some length codes, and 2 unused codes*/
-#define NUM_DEFLATE_CODE_SYMBOLS 288
-/*the distance codes have their own symbols, 30 used, 2 unused*/
-#define NUM_DISTANCE_SYMBOLS 32
-/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
-#define NUM_CODE_LENGTH_CODES 19
-
-/*the base lengths represented by codes 257-285*/
-static const unsigned LENGTHBASE[29]
-  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
-     67, 83, 99, 115, 131, 163, 195, 227, 258};
-
-/*the extra bits used by codes 257-285 (added to base length)*/
-static const unsigned LENGTHEXTRA[29]
-  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
-      4,  4,  4,   4,   5,   5,   5,   5,   0};
-
-/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
-static const unsigned DISTANCEBASE[30]
-  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
-     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
-
-/*the extra bits of backwards distances (added to base)*/
-static const unsigned DISTANCEEXTRA[30]
-  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
-       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
-
-/*the order in which "code length alphabet code lengths" are stored, out of this
-the huffman tree of the dynamic huffman tree lengths is generated*/
-static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
-  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*
-Huffman tree struct, containing multiple representations of the tree
-*/
-typedef struct HuffmanTree
-{
-  unsigned* tree2d;
-  unsigned* tree1d;
-  unsigned* lengths; /*the lengths of the codes of the 1d-tree*/
-  unsigned maxbitlen; /*maximum number of bits a single code can get*/
-  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
-} HuffmanTree;
-
-/*function used for debug purposes to draw the tree in ascii art with C++*/
-/*
-static void HuffmanTree_draw(HuffmanTree* tree)
-{
-  std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl;
-  for(size_t i = 0; i != tree->tree1d.size; ++i)
-  {
-    if(tree->lengths.data[i])
-      std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl;
-  }
-  std::cout << std::endl;
-}*/
-
-static void HuffmanTree_init(HuffmanTree* tree)
-{
-  tree->tree2d = 0;
-  tree->tree1d = 0;
-  tree->lengths = 0;
-}
-
-static void HuffmanTree_cleanup(HuffmanTree* tree)
-{
-  lodepng_free(tree->tree2d);
-  lodepng_free(tree->tree1d);
-  lodepng_free(tree->lengths);
-}
-
-/*the tree representation used by the decoder. return value is error*/
-static unsigned HuffmanTree_make2DTree(HuffmanTree* tree)
-{
-  unsigned nodefilled = 0; /*up to which node it is filled*/
-  unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/
-  unsigned n, i;
-
-  tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned));
-  if(!tree->tree2d) return 83; /*alloc fail*/
-
-  /*
-  convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
-  uninited, a value >= numcodes is an address to another bit, a value < numcodes
-  is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
-  many columns as codes - 1.
-  A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
-  Here, the internal nodes are stored (what their 0 and 1 option point to).
-  There is only memory for such good tree currently, if there are more nodes
-  (due to too long length codes), error 55 will happen
-  */
-  for(n = 0; n < tree->numcodes * 2; ++n)
-  {
-    tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/
-  }
-
-  for(n = 0; n < tree->numcodes; ++n) /*the codes*/
-  {
-    for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/
-    {
-      unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1);
-      /*oversubscribed, see comment in lodepng_error_text*/
-      if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55;
-      if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/
-      {
-        if(i + 1 == tree->lengths[n]) /*last bit*/
-        {
-          tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/
-          treepos = 0;
-        }
-        else
-        {
-          /*put address of the next step in here, first that address has to be found of course
-          (it's just nodefilled + 1)...*/
-          ++nodefilled;
-          /*addresses encoded with numcodes added to it*/
-          tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes;
-          treepos = nodefilled;
-        }
-      }
-      else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes;
-    }
-  }
-
-  for(n = 0; n < tree->numcodes * 2; ++n)
-  {
-    if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/
-  }
-
-  return 0;
-}
-
-/*
-Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
-numcodes, lengths and maxbitlen must already be filled in correctly. return
-value is error.
-*/
-static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree)
-{
-  uivector blcount;
-  uivector nextcode;
-  unsigned error = 0;
-  unsigned bits, n;
-
-  uivector_init(&blcount);
-  uivector_init(&nextcode);
-
-  tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
-  if(!tree->tree1d) error = 83; /*alloc fail*/
-
-  if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0)
-  || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0))
-    error = 83; /*alloc fail*/
-
-  if(!error)
-  {
-    /*step 1: count number of instances of each code length*/
-    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]];
-    /*step 2: generate the nextcode values*/
-    for(bits = 1; bits <= tree->maxbitlen; ++bits)
-    {
-      nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1;
-    }
-    /*step 3: generate all the codes*/
-    for(n = 0; n != tree->numcodes; ++n)
-    {
-      if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++;
-    }
-  }
-
-  uivector_cleanup(&blcount);
-  uivector_cleanup(&nextcode);
-
-  if(!error) return HuffmanTree_make2DTree(tree);
-  else return error;
-}
-
-/*
-given the code lengths (as stored in the PNG file), generate the tree as defined
-by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
-return value is error.
-*/
-static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
-                                            size_t numcodes, unsigned maxbitlen)
-{
-  unsigned i;
-  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->maxbitlen = maxbitlen;
-  return HuffmanTree_makeFromLengths2(tree);
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
-Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
-
-/*chain node for boundary package merge*/
-typedef struct BPMNode
-{
-  int weight; /*the sum of all weights in this chain*/
-  unsigned index; /*index of this leaf node (called "count" in the paper)*/
-  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
-  int in_use;
-} BPMNode;
-
-/*lists of chains*/
-typedef struct BPMLists
-{
-  /*memory pool*/
-  unsigned memsize;
-  BPMNode* memory;
-  unsigned numfree;
-  unsigned nextfree;
-  BPMNode** freelist;
-  /*two heads of lookahead chains per list*/
-  unsigned listsize;
-  BPMNode** chains0;
-  BPMNode** chains1;
-} BPMLists;
-
-/*creates a new chain node with the given parameters, from the memory in the lists */
-static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail)
-{
-  unsigned i;
-  BPMNode* result;
-
-  /*memory full, so garbage collect*/
-  if(lists->nextfree >= lists->numfree)
-  {
-    /*mark only those that are in use*/
-    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
-    for(i = 0; i != lists->listsize; ++i)
-    {
-      BPMNode* node;
-      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
-      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
-    }
-    /*collect those that are free*/
-    lists->numfree = 0;
-    for(i = 0; i != lists->memsize; ++i)
-    {
-      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
-    }
-    lists->nextfree = 0;
-  }
-
-  result = lists->freelist[lists->nextfree++];
-  result->weight = weight;
-  result->index = index;
-  result->tail = tail;
-  return result;
-}
-
-/*sort the leaves with stable mergesort*/
-static void bpmnode_sort(BPMNode* leaves, size_t num)
-{
-  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
-  size_t width, counter = 0;
-  for(width = 1; width < num; width *= 2)
-  {
-    BPMNode* a = (counter & 1) ? mem : leaves;
-    BPMNode* b = (counter & 1) ? leaves : mem;
-    size_t p;
-    for(p = 0; p < num; p += 2 * width)
-    {
-      size_t q = (p + width > num) ? num : (p + width);
-      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
-      size_t i = p, j = q, k;
-      for(k = p; k < r; k++)
-      {
-        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
-        else b[k] = a[j++];
-      }
-    }
-    counter++;
-  }
-  if(counter & 1) memcpy(leaves, mem, sizeof(*leaves) * num);
-  lodepng_free(mem);
-}
-
-/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
-static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num)
-{
-  unsigned lastindex = lists->chains1[c]->index;
-
-  if(c == 0)
-  {
-    if(lastindex >= numpresent) return;
-    lists->chains0[c] = lists->chains1[c];
-    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
-  }
-  else
-  {
-    /*sum of the weights of the head nodes of the previous lookahead chains.*/
-    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
-    lists->chains0[c] = lists->chains1[c];
-    if(lastindex < numpresent && sum > leaves[lastindex].weight)
-    {
-      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
-      return;
-    }
-    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
-    /*in the end we are only interested in the chain of the last list, so no
-    need to recurse if we're at the last one (this gives measurable speedup)*/
-    if(num + 1 < (int)(2 * numpresent - 2))
-    {
-      boundaryPM(lists, leaves, numpresent, c - 1, num);
-      boundaryPM(lists, leaves, numpresent, c - 1, num);
-    }
-  }
-}
-
-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
-                                      size_t numcodes, unsigned maxbitlen)
-{
-  unsigned error = 0;
-  unsigned i;
-  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
-  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
-
-  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
-  if((1u << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/
-
-  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
-  if(!leaves) return 83; /*alloc fail*/
-
-  for(i = 0; i != numcodes; ++i)
-  {
-    if(frequencies[i] > 0)
-    {
-      leaves[numpresent].weight = (int)frequencies[i];
-      leaves[numpresent].index = i;
-      ++numpresent;
-    }
-  }
-
-  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
-
-  /*ensure at least two present symbols. There should be at least one symbol
-  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
-  make these work as well ensure there are at least two symbols. The
-  Package-Merge code below also doesn't work correctly if there's only one
-  symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/
-  if(numpresent == 0)
-  {
-    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
-  }
-  else if(numpresent == 1)
-  {
-    lengths[leaves[0].index] = 1;
-    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
-  }
-  else
-  {
-    BPMLists lists;
-    BPMNode* node;
-
-    bpmnode_sort(leaves, numpresent);
-
-    lists.listsize = maxbitlen;
-    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
-    lists.nextfree = 0;
-    lists.numfree = lists.memsize;
-    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
-    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
-    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
-    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
-    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
-
-      bpmnode_create(&lists, leaves[0].weight, 1, 0);
-      bpmnode_create(&lists, leaves[1].weight, 2, 0);
-
-      for(i = 0; i != lists.listsize; ++i)
-      {
-        lists.chains0[i] = &lists.memory[0];
-        lists.chains1[i] = &lists.memory[1];
-      }
-
-      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
-      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
-
-      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail)
-      {
-        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
-      }
-    }
-
-    lodepng_free(lists.memory);
-    lodepng_free(lists.freelist);
-    lodepng_free(lists.chains0);
-    lodepng_free(lists.chains1);
-  }
-
-  lodepng_free(leaves);
-  return error;
-}
-
-/*Create the Huffman tree given the symbol frequencies*/
-static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
-                                                size_t mincodes, size_t numcodes, unsigned maxbitlen)
-{
-  unsigned error = 0;
-  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
-  tree->maxbitlen = maxbitlen;
-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  /*initialize all lengths to 0*/
-  memset(tree->lengths, 0, numcodes * sizeof(unsigned));
-
-  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
-  if(!error) error = HuffmanTree_makeFromLengths2(tree);
-  return error;
-}
-
-static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index)
-{
-  return tree->tree1d[index];
-}
-
-static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index)
-{
-  return tree->lengths[index];
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
-static unsigned generateFixedLitLenTree(HuffmanTree* tree)
-{
-  unsigned i, error = 0;
-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
-  if(!bitlen) return 83; /*alloc fail*/
-
-  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
-  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
-  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
-  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
-  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
-
-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
-
-  lodepng_free(bitlen);
-  return error;
-}
-
-/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static unsigned generateFixedDistanceTree(HuffmanTree* tree)
-{
-  unsigned i, error = 0;
-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
-  if(!bitlen) return 83; /*alloc fail*/
-
-  /*there are 32 distance codes, but 30-31 are unused*/
-  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
-
-  lodepng_free(bitlen);
-  return error;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/*
-returns the code, or (unsigned)(-1) if error happened
-inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
-*/
-static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp,
-                                    const HuffmanTree* codetree, size_t inbitlength)
-{
-  unsigned treepos = 0, ct;
-  for(;;)
-  {
-    if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/
-    /*
-    decode the symbol from the tree. The "readBitFromStream" code is inlined in
-    the expression below because this is the biggest bottleneck while decoding
-    */
-    ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)];
-    ++(*bp);
-    if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/
-    else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/
-
-    if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/
-  }
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Inflator (Decompressor)                                                / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d)
-{
-  /*TODO: check for out of memory errors*/
-  generateFixedLitLenTree(tree_ll);
-  generateFixedDistanceTree(tree_d);
-}
-
-/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
-static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
-                                      const unsigned char* in, size_t* bp, size_t inlength)
-{
-  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
-  unsigned error = 0;
-  unsigned n, HLIT, HDIST, HCLEN, i;
-  size_t inbitlength = inlength * 8;
-
-  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
-  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
-  unsigned* bitlen_d = 0; /*dist code lengths*/
-  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
-  unsigned* bitlen_cl = 0;
-  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
-
-  if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/
-
-  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
-  HLIT =  readBitsFromStream(bp, in, 5) + 257;
-  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
-  HDIST = readBitsFromStream(bp, in, 5) + 1;
-  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
-  HCLEN = readBitsFromStream(bp, in, 4) + 4;
-
-  if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/
-
-  HuffmanTree_init(&tree_cl);
-
-  while(!error)
-  {
-    /*read the code length codes out of 3 * (amount of code length codes) bits*/
-
-    bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
-    if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/);
-
-    for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i)
-    {
-      if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3);
-      else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/
-    }
-
-    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
-    if(error) break;
-
-    /*now we can use this tree to read the lengths for the tree that this function will return*/
-    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
-    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
-    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
-    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
-
-    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
-    i = 0;
-    while(i < HLIT + HDIST)
-    {
-      unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength);
-      if(code <= 15) /*a length code*/
-      {
-        if(i < HLIT) bitlen_ll[i] = code;
-        else bitlen_d[i - HLIT] = code;
-        ++i;
-      }
-      else if(code == 16) /*repeat previous*/
-      {
-        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
-        unsigned value; /*set value to the previous code*/
-
-        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
-
-        if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 2);
-
-        if(i < HLIT + 1) value = bitlen_ll[i - 1];
-        else value = bitlen_d[i - HLIT - 1];
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
-          if(i < HLIT) bitlen_ll[i] = value;
-          else bitlen_d[i - HLIT] = value;
-          ++i;
-        }
-      }
-      else if(code == 17) /*repeat "0" 3-10 times*/
-      {
-        unsigned replength = 3; /*read in the bits that indicate repeat length*/
-        if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 3);
-
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
-
-          if(i < HLIT) bitlen_ll[i] = 0;
-          else bitlen_d[i - HLIT] = 0;
-          ++i;
-        }
-      }
-      else if(code == 18) /*repeat "0" 11-138 times*/
-      {
-        unsigned replength = 11; /*read in the bits that indicate repeat length*/
-        if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 7);
-
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
-
-          if(i < HLIT) bitlen_ll[i] = 0;
-          else bitlen_d[i - HLIT] = 0;
-          ++i;
-        }
-      }
-      else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-      {
-        if(code == (unsigned)(-1))
-        {
-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-          (10=no endcode, 11=wrong jump outside of tree)*/
-          error = (*bp) > inbitlength ? 10 : 11;
-        }
-        else error = 16; /*unexisting code, this can never happen*/
-        break;
-      }
-    }
-    if(error) break;
-
-    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
-
-    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
-    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
-    if(error) break;
-    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
-
-    break; /*end of error-while*/
-  }
-
-  lodepng_free(bitlen_cl);
-  lodepng_free(bitlen_ll);
-  lodepng_free(bitlen_d);
-  HuffmanTree_cleanup(&tree_cl);
-
-  return error;
-}
-
-/*inflate a block with dynamic of fixed Huffman tree*/
-static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp,
-                                    size_t* pos, size_t inlength, unsigned btype)
-{
-  unsigned error = 0;
-  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
-  HuffmanTree tree_d; /*the huffman tree for distance codes*/
-  size_t inbitlength = inlength * 8;
-
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-
-  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
-  else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength);
-
-  while(!error) /*decode all symbols until end reached, breaks at end code*/
-  {
-    /*code_ll is literal, length or end code*/
-    unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength);
-    if(code_ll <= 255) /*literal symbol*/
-    {
-      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
-      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
-      out->data[*pos] = (unsigned char)code_ll;
-      ++(*pos);
-    }
-    else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/
-    {
-      unsigned code_d, distance;
-      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
-      size_t start, forward, backward, length;
-
-      /*part 1: get length base*/
-      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
-
-      /*part 2: get extra bits and add the value of that to length*/
-      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
-      if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
-      length += readBitsFromStream(bp, in, numextrabits_l);
-
-      /*part 3: get distance code*/
-      code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength);
-      if(code_d > 29)
-      {
-        if(code_ll == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-        {
-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-          (10=no endcode, 11=wrong jump outside of tree)*/
-          error = (*bp) > inlength * 8 ? 10 : 11;
-        }
-        else error = 18; /*error: invalid distance code (30-31 are never used)*/
-        break;
-      }
-      distance = DISTANCEBASE[code_d];
-
-      /*part 4: get extra bits from distance*/
-      numextrabits_d = DISTANCEEXTRA[code_d];
-      if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
-      distance += readBitsFromStream(bp, in, numextrabits_d);
-
-      /*part 5: fill in all the out[n] values based on the length and dist*/
-      start = (*pos);
-      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
-      backward = start - distance;
-
-      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
-      if (distance < length) {
-        for(forward = 0; forward < length; ++forward)
-        {
-          out->data[(*pos)++] = out->data[backward++];
-        }
-      } else {
-        memcpy(out->data + *pos, out->data + backward, length);
-        *pos += length;
-      }
-    }
-    else if(code_ll == 256)
-    {
-      break; /*end code, break the loop*/
-    }
-    else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-    {
-      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-      (10=no endcode, 11=wrong jump outside of tree)*/
-      error = ((*bp) > inlength * 8) ? 10 : 11;
-      break;
-    }
-  }
-
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-
-  return error;
-}
-
-static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength)
-{
-  size_t p;
-  unsigned LEN, NLEN, n, error = 0;
-
-  /*go to first boundary of byte*/
-  while(((*bp) & 0x7) != 0) ++(*bp);
-  p = (*bp) / 8; /*byte position*/
-
-  /*read LEN (2 bytes) and NLEN (2 bytes)*/
-  if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/
-  LEN = in[p] + 256u * in[p + 1]; p += 2;
-  NLEN = in[p] + 256u * in[p + 1]; p += 2;
-
-  /*check if 16-bit NLEN is really the one's complement of LEN*/
-  if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/
-
-  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
-
-  /*read the literal data: LEN bytes are now stored in the out buffer*/
-  if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/
-  for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++];
-
-  (*bp) = p * 8;
-
-  return error;
-}
-
-static unsigned lodepng_inflatev(ucvector* out,
-                                 const unsigned char* in, size_t insize,
-                                 const LodePNGDecompressSettings* settings)
-{
-  /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/
-  size_t bp = 0;
-  unsigned BFINAL = 0;
-  size_t pos = 0; /*byte position in the out buffer*/
-  unsigned error = 0;
-
-  (void)settings;
-
-  while(!BFINAL)
-  {
-    unsigned BTYPE;
-    if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/
-    BFINAL = readBitFromStream(&bp, in);
-    BTYPE = 1u * readBitFromStream(&bp, in);
-    BTYPE += 2u * readBitFromStream(&bp, in);
-
-    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
-    else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/
-    else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/
-
-    if(error) return error;
-  }
-
-  return error;
-}
-
-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGDecompressSettings* settings)
-{
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_inflatev(&v, in, insize, settings);
-  *out = v.data;
-  *outsize = v.size;
-  return error;
-}
-
-static unsigned inflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
-                        const LodePNGDecompressSettings* settings)
-{
-  if(settings->custom_inflate)
-  {
-    return settings->custom_inflate(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_inflate(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Deflator (Compressor)                                                  / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
-
-/*bitlen is the size in bits of the code*/
-static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen)
-{
-  addBitsToStreamReversed(bp, compressed, code, bitlen);
-}
-
-/*search the index in the array, that has the largest value smaller than or equal to the given value,
-given array must be sorted (if no value is smaller, it returns the size of the given array)*/
-static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value)
-{
-  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
-  size_t left = 1;
-  size_t right = array_size - 1;
-
-  while(left <= right) {
-    size_t mid = (left + right) >> 1;
-    if (array[mid] >= value) right = mid - 1;
-    else left = mid + 1;
-  }
-  if(left >= array_size || array[left] > value) left--;
-  return left;
-}
-
-static void addLengthDistance(uivector* values, size_t length, size_t distance)
-{
-  /*values in encoded vector are those used by deflate:
-  0-255: literal bytes
-  256: end
-  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
-  286-287: invalid*/
-
-  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
-  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
-  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
-  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
-
-  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
-  uivector_push_back(values, extra_length);
-  uivector_push_back(values, dist_code);
-  uivector_push_back(values, extra_distance);
-}
-
-/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
-bytes as input because 3 is the minimum match length for deflate*/
-static const unsigned HASH_NUM_VALUES = 65536;
-static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
-
-typedef struct Hash
-{
-  int* head; /*hash value to head circular pos - can be outdated if went around window*/
-  /*circular pos to prev circular pos*/
-  unsigned short* chain;
-  int* val; /*circular pos to hash value*/
-
-  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
-  it's always going to be the zeros that dominate, so not important for PNG*/
-  int* headz; /*similar to head, but for chainz*/
-  unsigned short* chainz; /*those with same amount of zeros*/
-  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
-} Hash;
-
-static unsigned hash_init(Hash* hash, unsigned windowsize)
-{
-  unsigned i;
-  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
-  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
-  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-
-  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
-  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-
-  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros)
-  {
-    return 83; /*alloc fail*/
-  }
-
-  /*initialize hash table*/
-  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
-
-  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
-
-  return 0;
-}
-
-static void hash_cleanup(Hash* hash)
-{
-  lodepng_free(hash->head);
-  lodepng_free(hash->val);
-  lodepng_free(hash->chain);
-
-  lodepng_free(hash->zeros);
-  lodepng_free(hash->headz);
-  lodepng_free(hash->chainz);
-}
-
-
-
-static unsigned getHash(const unsigned char* data, size_t size, size_t pos)
-{
-  unsigned result = 0;
-  if(pos + 2 < size)
-  {
-    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
-    by zeroes due to the filters, a better hash does not have a significant
-    effect on speed in traversing the chain, and causes more time spend on
-    calculating the hash.*/
-    result ^= (unsigned)(data[pos + 0] << 0u);
-    result ^= (unsigned)(data[pos + 1] << 4u);
-    result ^= (unsigned)(data[pos + 2] << 8u);
-  } else {
-    size_t amount, i;
-    if(pos >= size) return 0;
-    amount = size - pos;
-    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
-  }
-  return result & HASH_BIT_MASK;
-}
-
-static unsigned countZeros(const unsigned char* data, size_t size, size_t pos)
-{
-  const unsigned char* start = data + pos;
-  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
-  if(end > data + size) end = data + size;
-  data = start;
-  while(data != end && *data == 0) ++data;
-  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
-  return (unsigned)(data - start);
-}
-
-/*wpos = pos & (windowsize - 1)*/
-static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros)
-{
-  hash->val[wpos] = (int)hashval;
-  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
-  hash->head[hashval] = wpos;
-
-  hash->zeros[wpos] = numzeros;
-  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
-  hash->headz[numzeros] = wpos;
-}
-
-/*
-LZ77-encode the data. Return value is error code. The input are raw bytes, the output
-is in the form of unsigned integers with codes representing for example literal bytes, or
-length/distance pairs.
-It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
-sliding window (of windowsize) is used, and all past bytes in that window can be used as
-the "dictionary". A brute force search through all possible distances would be slow, and
-this hash technique is one out of several ways to speed this up.
-*/
-static unsigned encodeLZ77(uivector* out, Hash* hash,
-                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
-                           unsigned minmatch, unsigned nicematch, unsigned lazymatching)
-{
-  size_t pos;
-  unsigned i, error = 0;
-  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
-  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8;
-  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
-
-  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
-  unsigned numzeros = 0;
-
-  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
-  unsigned length;
-  unsigned lazy = 0;
-  unsigned lazylength = 0, lazyoffset = 0;
-  unsigned hashval;
-  unsigned current_offset, current_length;
-  unsigned prev_offset;
-  const unsigned char *lastptr, *foreptr, *backptr;
-  unsigned hashpos;
-
-  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
-  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
-
-  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
-
-  for(pos = inpos; pos < insize; ++pos)
-  {
-    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
-    unsigned chainlength = 0;
-
-    hashval = getHash(in, insize, pos);
-
-    if(usezeros && hashval == 0)
-    {
-      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
-      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
-    }
-    else
-    {
-      numzeros = 0;
-    }
-
-    updateHashChain(hash, wpos, hashval, numzeros);
-
-    /*the length and offset found for the current position*/
-    length = 0;
-    offset = 0;
-
-    hashpos = hash->chain[wpos];
-
-    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
-
-    /*search for the longest string*/
-    prev_offset = 0;
-    for(;;)
-    {
-      if(chainlength++ >= maxchainlength) break;
-      current_offset = hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize;
-
-      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
-      prev_offset = current_offset;
-      if(current_offset > 0)
-      {
-        /*test the next characters*/
-        foreptr = &in[pos];
-        backptr = &in[pos - current_offset];
-
-        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
-        if(numzeros >= 3)
-        {
-          unsigned skip = hash->zeros[hashpos];
-          if(skip > numzeros) skip = numzeros;
-          backptr += skip;
-          foreptr += skip;
-        }
-
-        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/
-        {
-          ++backptr;
-          ++foreptr;
-        }
-        current_length = (unsigned)(foreptr - &in[pos]);
-
-        if(current_length > length)
-        {
-          length = current_length; /*the longest length*/
-          offset = current_offset; /*the offset that is related to this longest length*/
-          /*jump out once a length of max length is found (speed gain). This also jumps
-          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
-          if(current_length >= nicematch) break;
-        }
-      }
-
-      if(hashpos == hash->chain[hashpos]) break;
-
-      if(numzeros >= 3 && length > numzeros)
-      {
-        hashpos = hash->chainz[hashpos];
-        if(hash->zeros[hashpos] != numzeros) break;
-      }
-      else
-      {
-        hashpos = hash->chain[hashpos];
-        /*outdated hash value, happens if particular value was not encountered in whole last window*/
-        if(hash->val[hashpos] != (int)hashval) break;
-      }
-    }
-
-    if(lazymatching)
-    {
-      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH)
-      {
-        lazy = 1;
-        lazylength = length;
-        lazyoffset = offset;
-        continue; /*try the next byte*/
-      }
-      if(lazy)
-      {
-        lazy = 0;
-        if(pos == 0) ERROR_BREAK(81);
-        if(length > lazylength + 1)
-        {
-          /*push the previous character as literal*/
-          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
-        }
-        else
-        {
-          length = lazylength;
-          offset = lazyoffset;
-          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
-          hash->headz[numzeros] = -1; /*idem*/
-          --pos;
-        }
-      }
-    }
-    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
-
-    /*encode it as length/distance pair or literal value*/
-    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/
-    {
-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    else if(length < minmatch || (length == 3 && offset > 4096))
-    {
-      /*compensate for the fact that longer offsets have more extra bits, a
-      length of only 3 may be not worth it then*/
-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    else
-    {
-      addLengthDistance(out, length, offset);
-      for(i = 1; i < length; ++i)
-      {
-        ++pos;
-        wpos = pos & (windowsize - 1);
-        hashval = getHash(in, insize, pos);
-        if(usezeros && hashval == 0)
-        {
-          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
-          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
-        }
-        else
-        {
-          numzeros = 0;
-        }
-        updateHashChain(hash, wpos, hashval, numzeros);
-      }
-    }
-  } /*end of the loop through each character of input*/
-
-  return error;
-}
-
-/* /////////////////////////////////////////////////////////////////////////// */
-
-static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize)
-{
-  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
-  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
-
-  size_t i, j, numdeflateblocks = (datasize + 65534) / 65535;
-  unsigned datapos = 0;
-  for(i = 0; i != numdeflateblocks; ++i)
-  {
-    unsigned BFINAL, BTYPE, LEN, NLEN;
-    unsigned char firstbyte;
-
-    BFINAL = (i == numdeflateblocks - 1);
-    BTYPE = 0;
-
-    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
-    ucvector_push_back(out, firstbyte);
-
-    LEN = 65535;
-    if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos;
-    NLEN = 65535 - LEN;
-
-    ucvector_push_back(out, (unsigned char)(LEN & 255));
-    ucvector_push_back(out, (unsigned char)(LEN >> 8));
-    ucvector_push_back(out, (unsigned char)(NLEN & 255));
-    ucvector_push_back(out, (unsigned char)(NLEN >> 8));
-
-    /*Decompressed data*/
-    for(j = 0; j < 65535 && datapos < datasize; ++j)
-    {
-      ucvector_push_back(out, data[datapos++]);
-    }
-  }
-
-  return 0;
-}
-
-/*
-write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
-tree_ll: the tree for lit and len codes.
-tree_d: the tree for distance codes.
-*/
-static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded,
-                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d)
-{
-  size_t i = 0;
-  for(i = 0; i != lz77_encoded->size; ++i)
-  {
-    unsigned val = lz77_encoded->data[i];
-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
-    if(val > 256) /*for a length code, 3 more things have to be added*/
-    {
-      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
-      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
-      unsigned length_extra_bits = lz77_encoded->data[++i];
-
-      unsigned distance_code = lz77_encoded->data[++i];
-
-      unsigned distance_index = distance_code;
-      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
-      unsigned distance_extra_bits = lz77_encoded->data[++i];
-
-      addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits);
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code),
-                       HuffmanTree_getLength(tree_d, distance_code));
-      addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits);
-    }
-  }
-}
-
-/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
-static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash,
-                               const unsigned char* data, size_t datapos, size_t dataend,
-                               const LodePNGCompressSettings* settings, unsigned final)
-{
-  unsigned error = 0;
-
-  /*
-  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
-  literal bytes and length/distance pairs. This is then huffman compressed with
-  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
-  another huffman tree is used for the dist values ("d"). These two trees are
-  stored using their code lengths, and to compress even more these code lengths
-  are also run-length encoded and huffman compressed. This gives a huffman tree
-  of code lengths "cl". The code lenghts used to describe this third tree are
-  the code length code lengths ("clcl").
-  */
-
-  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
-  uivector lz77_encoded;
-  HuffmanTree tree_ll; /*tree for lit,len values*/
-  HuffmanTree tree_d; /*tree for distance codes*/
-  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
-  uivector frequencies_ll; /*frequency of lit,len codes*/
-  uivector frequencies_d; /*frequency of dist codes*/
-  uivector frequencies_cl; /*frequency of code length codes*/
-  uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/
-  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/
-  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
-  (these are written as is in the file, it would be crazy to compress these using yet another huffman
-  tree that needs to be represented by yet another set of code lengths)*/
-  uivector bitlen_cl;
-  size_t datasize = dataend - datapos;
-
-  /*
-  Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
-  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
-  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
-  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
-  */
-
-  unsigned BFINAL = final;
-  size_t numcodes_ll, numcodes_d, i;
-  unsigned HLIT, HDIST, HCLEN;
-
-  uivector_init(&lz77_encoded);
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-  HuffmanTree_init(&tree_cl);
-  uivector_init(&frequencies_ll);
-  uivector_init(&frequencies_d);
-  uivector_init(&frequencies_cl);
-  uivector_init(&bitlen_lld);
-  uivector_init(&bitlen_lld_e);
-  uivector_init(&bitlen_cl);
-
-  /*This while loop never loops due to a break at the end, it is here to
-  allow breaking out of it to the cleanup phase on error conditions.*/
-  while(!error)
-  {
-    if(settings->use_lz77)
-    {
-      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                         settings->minmatch, settings->nicematch, settings->lazymatching);
-      if(error) break;
-    }
-    else
-    {
-      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
-      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
-    }
-
-    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
-
-    /*Count the frequencies of lit, len and dist codes*/
-    for(i = 0; i != lz77_encoded.size; ++i)
-    {
-      unsigned symbol = lz77_encoded.data[i];
-      ++frequencies_ll.data[symbol];
-      if(symbol > 256)
-      {
-        unsigned dist = lz77_encoded.data[i + 2];
-        ++frequencies_d.data[dist];
-        i += 3;
-      }
-    }
-    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
-
-    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
-    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
-    if(error) break;
-    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
-    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
-    if(error) break;
-
-    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
-    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
-    /*store the code lengths of both generated trees in bitlen_lld*/
-    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
-    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
-
-    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
-    17 (3-10 zeroes), 18 (11-138 zeroes)*/
-    for(i = 0; i != (unsigned)bitlen_lld.size; ++i)
-    {
-      unsigned j = 0; /*amount of repititions*/
-      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
-
-      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/
-      {
-        ++j; /*include the first zero*/
-        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/
-        {
-          uivector_push_back(&bitlen_lld_e, 17);
-          uivector_push_back(&bitlen_lld_e, j - 3);
-        }
-        else /*repeat code 18 supports max 138 zeroes*/
-        {
-          if(j > 138) j = 138;
-          uivector_push_back(&bitlen_lld_e, 18);
-          uivector_push_back(&bitlen_lld_e, j - 11);
-        }
-        i += (j - 1);
-      }
-      else if(j >= 3) /*repeat code for value other than zero*/
-      {
-        size_t k;
-        unsigned num = j / 6, rest = j % 6;
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
-        for(k = 0; k < num; ++k)
-        {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, 6 - 3);
-        }
-        if(rest >= 3)
-        {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, rest - 3);
-        }
-        else j -= rest;
-        i += j;
-      }
-      else /*too short to benefit from repeat code*/
-      {
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
-      }
-    }
-
-    /*generate tree_cl, the huffmantree of huffmantrees*/
-
-    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != bitlen_lld_e.size; ++i)
-    {
-      ++frequencies_cl.data[bitlen_lld_e.data[i]];
-      /*after a repeat code come the bits that specify the number of repetitions,
-      those don't need to be in the frequencies_cl calculation*/
-      if(bitlen_lld_e.data[i] >= 16) ++i;
-    }
-
-    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
-                                            frequencies_cl.size, frequencies_cl.size, 7);
-    if(error) break;
-
-    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != tree_cl.numcodes; ++i)
-    {
-      /*lenghts of code length tree is in the order as specified by deflate*/
-      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
-    }
-    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4)
-    {
-      /*remove zeros at the end, but minimum size must be 4*/
-      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    if(error) break;
-
-    /*
-    Write everything into the output
-
-    After the BFINAL and BTYPE, the dynamic block consists out of the following:
-    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
-    - (HCLEN+4)*3 bits code lengths of code length alphabet
-    - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
-      alphabet, + possible repetition codes 16, 17, 18)
-    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
-      alphabet, + possible repetition codes 16, 17, 18)
-    - compressed data
-    - 256 (end code)
-    */
-
-    /*Write block type*/
-    addBitToStream(bp, out, BFINAL);
-    addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/
-    addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/
-
-    /*write the HLIT, HDIST and HCLEN values*/
-    HLIT = (unsigned)(numcodes_ll - 257);
-    HDIST = (unsigned)(numcodes_d - 1);
-    HCLEN = (unsigned)bitlen_cl.size - 4;
-    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
-    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
-    addBitsToStream(bp, out, HLIT, 5);
-    addBitsToStream(bp, out, HDIST, 5);
-    addBitsToStream(bp, out, HCLEN, 4);
-
-    /*write the code lenghts of the code length alphabet*/
-    for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3);
-
-    /*write the lenghts of the lit/len AND the dist alphabet*/
-    for(i = 0; i != bitlen_lld_e.size; ++i)
-    {
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
-                       HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
-      /*extra bits of repeat codes*/
-      if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2);
-      else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3);
-      else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7);
-    }
-
-    /*write the compressed data symbols*/
-    writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
-    /*error: the length of the end code 256 must be larger than 0*/
-    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
-
-    /*write the end code*/
-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
-
-    break; /*end of error-while*/
-  }
-
-  /*cleanup*/
-  uivector_cleanup(&lz77_encoded);
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-  HuffmanTree_cleanup(&tree_cl);
-  uivector_cleanup(&frequencies_ll);
-  uivector_cleanup(&frequencies_d);
-  uivector_cleanup(&frequencies_cl);
-  uivector_cleanup(&bitlen_lld_e);
-  uivector_cleanup(&bitlen_lld);
-  uivector_cleanup(&bitlen_cl);
-
-  return error;
-}
-
-static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash,
-                             const unsigned char* data,
-                             size_t datapos, size_t dataend,
-                             const LodePNGCompressSettings* settings, unsigned final)
-{
-  HuffmanTree tree_ll; /*tree for literal values and length codes*/
-  HuffmanTree tree_d; /*tree for distance codes*/
-
-  unsigned BFINAL = final;
-  unsigned error = 0;
-  size_t i;
-
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-
-  generateFixedLitLenTree(&tree_ll);
-  generateFixedDistanceTree(&tree_d);
-
-  addBitToStream(bp, out, BFINAL);
-  addBitToStream(bp, out, 1); /*first bit of BTYPE*/
-  addBitToStream(bp, out, 0); /*second bit of BTYPE*/
-
-  if(settings->use_lz77) /*LZ77 encoded*/
-  {
-    uivector lz77_encoded;
-    uivector_init(&lz77_encoded);
-    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                       settings->minmatch, settings->nicematch, settings->lazymatching);
-    if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
-    uivector_cleanup(&lz77_encoded);
-  }
-  else /*no LZ77, but still will be Huffman compressed*/
-  {
-    for(i = datapos; i < dataend; ++i)
-    {
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
-    }
-  }
-  /*add END code*/
-  if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
-
-  /*cleanup*/
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-
-  return error;
-}
-
-static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
-                                 const LodePNGCompressSettings* settings)
-{
-  unsigned error = 0;
-  size_t i, blocksize, numdeflateblocks;
-  size_t bp = 0; /*the bit pointer*/
-  Hash hash;
-
-  if(settings->btype > 2) return 61;
-  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
-  else if(settings->btype == 1) blocksize = insize;
-  else /*if(settings->btype == 2)*/
-  {
-    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
-    blocksize = insize / 8 + 8;
-    if(blocksize < 65536) blocksize = 65536;
-    if(blocksize > 262144) blocksize = 262144;
-  }
-
-  numdeflateblocks = (insize + blocksize - 1) / blocksize;
-  if(numdeflateblocks == 0) numdeflateblocks = 1;
-
-  error = hash_init(&hash, settings->windowsize);
-  if(error) return error;
-
-  for(i = 0; i != numdeflateblocks && !error; ++i)
-  {
-    unsigned final = (i == numdeflateblocks - 1);
-    size_t start = i * blocksize;
-    size_t end = start + blocksize;
-    if(end > insize) end = insize;
-
-    if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final);
-    else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final);
-  }
-
-  hash_cleanup(&hash);
-
-  return error;
-}
-
-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGCompressSettings* settings)
-{
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_deflatev(&v, in, insize, settings);
-  *out = v.data;
-  *outsize = v.size;
-  return error;
-}
-
-static unsigned deflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
-                        const LodePNGCompressSettings* settings)
-{
-  if(settings->custom_deflate)
-  {
-    return settings->custom_deflate(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_deflate(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Adler32                                                                  */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len)
-{
-   unsigned s1 = adler & 0xffff;
-   unsigned s2 = (adler >> 16) & 0xffff;
-
-  while(len > 0)
-  {
-    /*at least 5550 sums can be done before the sums overflow, saving a lot of module divisions*/
-    unsigned amount = len > 5550 ? 5550 : len;
-    len -= amount;
-    while(amount > 0)
-    {
-      s1 += (*data++);
-      s2 += s1;
-      --amount;
-    }
-    s1 %= 65521;
-    s2 %= 65521;
-  }
-
-  return (s2 << 16) | s1;
-}
-
-/*Return the adler32 of the bytes data[0..len-1]*/
-static unsigned adler32(const unsigned char* data, unsigned len)
-{
-  return update_adler32(1L, data, len);
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Zlib                                                                   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                 size_t insize, const LodePNGDecompressSettings* settings)
-{
-  unsigned error = 0;
-  unsigned CM, CINFO, FDICT;
-
-  if(insize < 2) return 53; /*error, size of zlib data too small*/
-  /*read information from zlib header*/
-  if((in[0] * 256 + in[1]) % 31 != 0)
-  {
-    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
-    return 24;
-  }
-
-  CM = in[0] & 15;
-  CINFO = (in[0] >> 4) & 15;
-  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
-  FDICT = (in[1] >> 5) & 1;
-  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
-
-  if(CM != 8 || CINFO > 7)
-  {
-    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
-    return 25;
-  }
-  if(FDICT != 0)
-  {
-    /*error: the specification of PNG says about the zlib stream:
-      "The additional flags shall not specify a preset dictionary."*/
-    return 26;
-  }
-
-  error = inflate(out, outsize, in + 2, insize - 2, settings);
-  if(error) return error;
-
-  if(!settings->ignore_adler32)
-  {
-    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
-    unsigned checksum = adler32(*out, (unsigned)(*outsize));
-    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
-  }
-
-  return 0; /*no error*/
-}
-
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings)
-{
-  if(settings->custom_zlib)
-  {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                               size_t insize, const LodePNGCompressSettings* settings)
-{
-  /*initially, *out must be NULL and outsize 0, if you just give some random *out
-  that's pointing to a non allocated buffer, this'll crash*/
-  ucvector outv;
-  size_t i;
-  unsigned error;
-  unsigned char* deflatedata = 0;
-  size_t deflatesize = 0;
-
-  /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
-  unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
-  unsigned FLEVEL = 0;
-  unsigned FDICT = 0;
-  unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
-  unsigned FCHECK = 31 - CMFFLG % 31;
-  CMFFLG += FCHECK;
-
-  /*ucvector-controlled version of the output buffer, for dynamic array*/
-  ucvector_init_buffer(&outv, *out, *outsize);
-
-  ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8));
-  ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255));
-
-  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
-
-  if(!error)
-  {
-    unsigned ADLER32 = adler32(in, (unsigned)insize);
-    for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]);
-    lodepng_free(deflatedata);
-    lodepng_add32bitInt(&outv, ADLER32);
-  }
-
-  *out = outv.data;
-  *outsize = outv.size;
-
-  return error;
-}
-
-/* compress using the default or custom zlib function */
-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                              size_t insize, const LodePNGCompressSettings* settings)
-{
-  if(settings->custom_zlib)
-  {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_zlib_compress(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#else /*no LODEPNG_COMPILE_ZLIB*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings)
-{
-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
-  return settings->custom_zlib(out, outsize, in, insize, settings);
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                              size_t insize, const LodePNGCompressSettings* settings)
-{
-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
-  return settings->custom_zlib(out, outsize, in, insize, settings);
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/*this is a good tradeoff between speed and compression ratio*/
-#define DEFAULT_WINDOWSIZE 2048
-
-void lodepng_compress_settings_init(LodePNGCompressSettings* settings)
-{
-  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
-  settings->btype = 2;
-  settings->use_lz77 = 1;
-  settings->windowsize = DEFAULT_WINDOWSIZE;
-  settings->minmatch = 3;
-  settings->nicematch = 128;
-  settings->lazymatching = 1;
-
-  settings->custom_zlib = 0;
-  settings->custom_deflate = 0;
-  settings->custom_context = 0;
-}
-
-const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
-
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings)
-{
-  settings->ignore_adler32 = 0;
-
-  settings->custom_zlib = 0;
-  settings->custom_inflate = 0;
-  settings->custom_context = 0;
-}
-
-const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0};
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // End of Zlib related code. Begin of PNG related code.                 // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_PNG
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / CRC32                                                                  / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-
-#ifndef LODEPNG_NO_COMPILE_CRC
-/* CRC polynomial: 0xedb88320 */
-static unsigned lodepng_crc32_table[256] = {
-           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
-   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
-   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
-   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
-   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
-   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
-   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
-   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
-  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
-  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
-  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
-  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
-  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
-  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
-  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
-  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
-  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
-  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
-  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
-  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
-  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
-  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
-  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
-  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
-  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
-  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
-  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
-  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
-  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
-  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
-  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
-  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
-};
-
-/*Return the CRC of the bytes buf[0..len-1].*/
-unsigned lodepng_crc32(const unsigned char* data, size_t length)
-{
-  unsigned r = 0xffffffffu;
-  size_t i;
-  for(i = 0; i < length; ++i)
-  {
-    r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8);
-  }
-  return r ^ 0xffffffffu;
-}
-#else /* !LODEPNG_NO_COMPILE_CRC */
-unsigned lodepng_crc32(const unsigned char* data, size_t length);
-#endif /* !LODEPNG_NO_COMPILE_CRC */
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Reading and writing single bits and bytes from/to stream for LodePNG   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream)
-{
-  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
-  ++(*bitpointer);
-  return result;
-}
-
-static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
-{
-  unsigned result = 0;
-  size_t i;
-  for(i = 0 ; i < nbits; ++i)
-  {
-    result <<= 1;
-    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
-  }
-  return result;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
-{
-  /*the current bit in bitstream must be 0 for this to work*/
-  if(bit)
-  {
-    /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/
-    bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7)));
-  }
-  ++(*bitpointer);
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
-{
-  /*the current bit in bitstream may be 0 or 1 for this to work*/
-  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
-  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
-  ++(*bitpointer);
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG chunks                                                             / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-unsigned lodepng_chunk_length(const unsigned char* chunk)
-{
-  return lodepng_read32bitInt(&chunk[0]);
-}
-
-void lodepng_chunk_type(char type[5], const unsigned char* chunk)
-{
-  unsigned i;
-  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
-  type[4] = 0; /*null termination char*/
-}
-
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type)
-{
-  if(strlen(type) != 4) return 0;
-  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
-}
-
-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk)
-{
-  return((chunk[4] & 32) != 0);
-}
-
-unsigned char lodepng_chunk_private(const unsigned char* chunk)
-{
-  return((chunk[6] & 32) != 0);
-}
-
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk)
-{
-  return((chunk[7] & 32) != 0);
-}
-
-unsigned char* lodepng_chunk_data(unsigned char* chunk)
-{
-  return &chunk[8];
-}
-
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk)
-{
-  return &chunk[8];
-}
-
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk)
-{
-  unsigned length = lodepng_chunk_length(chunk);
-  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
-  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
-  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
-  if(CRC != checksum) return 1;
-  else return 0;
-}
-
-void lodepng_chunk_generate_crc(unsigned char* chunk)
-{
-  unsigned length = lodepng_chunk_length(chunk);
-  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
-  lodepng_set32bitInt(chunk + 8 + length, CRC);
-}
-
-unsigned char* lodepng_chunk_next(unsigned char* chunk)
-{
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  return &chunk[total_chunk_length];
-}
-
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk)
-{
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  return &chunk[total_chunk_length];
-}
-
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk)
-{
-  unsigned i;
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  unsigned char *chunk_start, *new_buffer;
-  size_t new_length = (*outlength) + total_chunk_length;
-  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
-
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk_start = &(*out)[new_length - total_chunk_length];
-
-  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
-
-  return 0;
-}
-
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data)
-{
-  unsigned i;
-  unsigned char *chunk, *new_buffer;
-  size_t new_length = (*outlength) + length + 12;
-  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk = &(*out)[(*outlength) - length - 12];
-
-  /*1: length*/
-  lodepng_set32bitInt(chunk, (unsigned)length);
-
-  /*2: chunk name (4 letters)*/
-  chunk[4] = (unsigned char)type[0];
-  chunk[5] = (unsigned char)type[1];
-  chunk[6] = (unsigned char)type[2];
-  chunk[7] = (unsigned char)type[3];
-
-  /*3: the data*/
-  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
-
-  /*4: CRC (of the chunkname characters and the data)*/
-  lodepng_chunk_generate_crc(chunk);
-
-  return 0;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Color types and such                                                   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*return type is a LodePNG error code*/
-static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/
-{
-  switch(colortype)
-  {
-    case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/
-    case 2: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGB*/
-    case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break; /*palette*/
-    case 4: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/
-    case 6: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGBA*/
-    default: return 31;
-  }
-  return 0; /*allowed color type / bits combination*/
-}
-
-static unsigned getNumColorChannels(LodePNGColorType colortype)
-{
-  switch(colortype)
-  {
-    case 0: return 1; /*grey*/
-    case 2: return 3; /*RGB*/
-    case 3: return 1; /*palette*/
-    case 4: return 2; /*grey + alpha*/
-    case 6: return 4; /*RGBA*/
-  }
-  return 0; /*unexisting color type*/
-}
-
-static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth)
-{
-  /*bits per pixel is amount of channels * bits per channel*/
-  return getNumColorChannels(colortype) * bitdepth;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-void lodepng_color_mode_init(LodePNGColorMode* info)
-{
-  info->key_defined = 0;
-  info->key_r = info->key_g = info->key_b = 0;
-  info->colortype = LCT_RGBA;
-  info->bitdepth = 8;
-  info->palette = 0;
-  info->palettesize = 0;
-}
-
-void lodepng_color_mode_cleanup(LodePNGColorMode* info)
-{
-  lodepng_palette_clear(info);
-}
-
-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source)
-{
-  size_t i;
-  lodepng_color_mode_cleanup(dest);
-  *dest = *source;
-  if(source->palette)
-  {
-    dest->palette = (unsigned char*)lodepng_malloc(1024);
-    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
-    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
-  }
-  return 0;
-}
-
-static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b)
-{
-  size_t i;
-  if(a->colortype != b->colortype) return 0;
-  if(a->bitdepth != b->bitdepth) return 0;
-  if(a->key_defined != b->key_defined) return 0;
-  if(a->key_defined)
-  {
-    if(a->key_r != b->key_r) return 0;
-    if(a->key_g != b->key_g) return 0;
-    if(a->key_b != b->key_b) return 0;
-  }
-  /*if one of the palette sizes is 0, then we consider it to be the same as the
-  other: it means that e.g. the palette was not given by the user and should be
-  considered the same as the palette inside the PNG.*/
-  if(1/*a->palettesize != 0 && b->palettesize != 0*/) {
-    if(a->palettesize != b->palettesize) return 0;
-    for(i = 0; i != a->palettesize * 4; ++i)
-    {
-      if(a->palette[i] != b->palette[i]) return 0;
-    }
-  }
-  return 1;
-}
-
-void lodepng_palette_clear(LodePNGColorMode* info)
-{
-  if(info->palette) lodepng_free(info->palette);
-  info->palette = 0;
-  info->palettesize = 0;
-}
-
-unsigned lodepng_palette_add(LodePNGColorMode* info,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  unsigned char* data;
-  /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with
-  the max of 256 colors, it'll have the exact alloc size*/
-  if(!info->palette) /*allocate palette if empty*/
-  {
-    /*room for 256 colors with 4 bytes each*/
-    data = (unsigned char*)lodepng_realloc(info->palette, 1024);
-    if(!data) return 83; /*alloc fail*/
-    else info->palette = data;
-  }
-  info->palette[4 * info->palettesize + 0] = r;
-  info->palette[4 * info->palettesize + 1] = g;
-  info->palette[4 * info->palettesize + 2] = b;
-  info->palette[4 * info->palettesize + 3] = a;
-  ++info->palettesize;
-  return 0;
-}
-
-unsigned lodepng_get_bpp(const LodePNGColorMode* info)
-{
-  /*calculate bits per pixel out of colortype and bitdepth*/
-  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
-}
-
-unsigned lodepng_get_channels(const LodePNGColorMode* info)
-{
-  return getNumColorChannels(info->colortype);
-}
-
-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info)
-{
-  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
-}
-
-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info)
-{
-  return (info->colortype & 4) != 0; /*4 or 6*/
-}
-
-unsigned lodepng_is_palette_type(const LodePNGColorMode* info)
-{
-  return info->colortype == LCT_PALETTE;
-}
-
-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info)
-{
-  size_t i;
-  for(i = 0; i != info->palettesize; ++i)
-  {
-    if(info->palette[i * 4 + 3] < 255) return 1;
-  }
-  return 0;
-}
-
-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info)
-{
-  return info->key_defined
-      || lodepng_is_alpha_type(info)
-      || lodepng_has_palette_alpha(info);
-}
-
-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  size_t bpp = lodepng_get_bpp(color);
-  size_t n = w * h;
-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
-}
-
-size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
-  size_t n = w * h;
-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
-}
-
-
-#ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_DECODER
-/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/
-static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  size_t bpp = lodepng_get_bpp(color);
-  size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8;
-  return h * line;
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-
-static void LodePNGUnknownChunks_init(LodePNGInfo* info)
-{
-  unsigned i;
-  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
-  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
-}
-
-static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info)
-{
-  unsigned i;
-  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
-}
-
-static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src)
-{
-  unsigned i;
-
-  LodePNGUnknownChunks_cleanup(dest);
-
-  for(i = 0; i != 3; ++i)
-  {
-    size_t j;
-    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
-    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
-    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
-    for(j = 0; j < src->unknown_chunks_size[i]; ++j)
-    {
-      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
-    }
-  }
-
-  return 0;
-}
-
-/******************************************************************************/
-
-static void LodePNGText_init(LodePNGInfo* info)
-{
-  info->text_num = 0;
-  info->text_keys = NULL;
-  info->text_strings = NULL;
-}
-
-static void LodePNGText_cleanup(LodePNGInfo* info)
-{
-  size_t i;
-  for(i = 0; i != info->text_num; ++i)
-  {
-    string_cleanup(&info->text_keys[i]);
-    string_cleanup(&info->text_strings[i]);
-  }
-  lodepng_free(info->text_keys);
-  lodepng_free(info->text_strings);
-}
-
-static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  size_t i = 0;
-  dest->text_keys = 0;
-  dest->text_strings = 0;
-  dest->text_num = 0;
-  for(i = 0; i != source->text_num; ++i)
-  {
-    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
-  }
-  return 0;
-}
-
-void lodepng_clear_text(LodePNGInfo* info)
-{
-  LodePNGText_cleanup(info);
-}
-
-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str)
-{
-  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
-  if(!new_keys || !new_strings)
-  {
-    lodepng_free(new_keys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
-
-  ++info->text_num;
-  info->text_keys = new_keys;
-  info->text_strings = new_strings;
-
-  string_init(&info->text_keys[info->text_num - 1]);
-  string_set(&info->text_keys[info->text_num - 1], key);
-
-  string_init(&info->text_strings[info->text_num - 1]);
-  string_set(&info->text_strings[info->text_num - 1], str);
-
-  return 0;
-}
-
-/******************************************************************************/
-
-static void LodePNGIText_init(LodePNGInfo* info)
-{
-  info->itext_num = 0;
-  info->itext_keys = NULL;
-  info->itext_langtags = NULL;
-  info->itext_transkeys = NULL;
-  info->itext_strings = NULL;
-}
-
-static void LodePNGIText_cleanup(LodePNGInfo* info)
-{
-  size_t i;
-  for(i = 0; i != info->itext_num; ++i)
-  {
-    string_cleanup(&info->itext_keys[i]);
-    string_cleanup(&info->itext_langtags[i]);
-    string_cleanup(&info->itext_transkeys[i]);
-    string_cleanup(&info->itext_strings[i]);
-  }
-  lodepng_free(info->itext_keys);
-  lodepng_free(info->itext_langtags);
-  lodepng_free(info->itext_transkeys);
-  lodepng_free(info->itext_strings);
-}
-
-static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  size_t i = 0;
-  dest->itext_keys = 0;
-  dest->itext_langtags = 0;
-  dest->itext_transkeys = 0;
-  dest->itext_strings = 0;
-  dest->itext_num = 0;
-  for(i = 0; i != source->itext_num; ++i)
-  {
-    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
-                                        source->itext_transkeys[i], source->itext_strings[i]));
-  }
-  return 0;
-}
-
-void lodepng_clear_itext(LodePNGInfo* info)
-{
-  LodePNGIText_cleanup(info);
-}
-
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str)
-{
-  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
-  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
-  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
-  if(!new_keys || !new_langtags || !new_transkeys || !new_strings)
-  {
-    lodepng_free(new_keys);
-    lodepng_free(new_langtags);
-    lodepng_free(new_transkeys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
-
-  ++info->itext_num;
-  info->itext_keys = new_keys;
-  info->itext_langtags = new_langtags;
-  info->itext_transkeys = new_transkeys;
-  info->itext_strings = new_strings;
-
-  string_init(&info->itext_keys[info->itext_num - 1]);
-  string_set(&info->itext_keys[info->itext_num - 1], key);
-
-  string_init(&info->itext_langtags[info->itext_num - 1]);
-  string_set(&info->itext_langtags[info->itext_num - 1], langtag);
-
-  string_init(&info->itext_transkeys[info->itext_num - 1]);
-  string_set(&info->itext_transkeys[info->itext_num - 1], transkey);
-
-  string_init(&info->itext_strings[info->itext_num - 1]);
-  string_set(&info->itext_strings[info->itext_num - 1], str);
-
-  return 0;
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-void lodepng_info_init(LodePNGInfo* info)
-{
-  lodepng_color_mode_init(&info->color);
-  info->interlace_method = 0;
-  info->compression_method = 0;
-  info->filter_method = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  info->background_defined = 0;
-  info->background_r = info->background_g = info->background_b = 0;
-
-  LodePNGText_init(info);
-  LodePNGIText_init(info);
-
-  info->time_defined = 0;
-  info->phys_defined = 0;
-
-  LodePNGUnknownChunks_init(info);
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-void lodepng_info_cleanup(LodePNGInfo* info)
-{
-  lodepng_color_mode_cleanup(&info->color);
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  LodePNGText_cleanup(info);
-  LodePNGIText_cleanup(info);
-
-  LodePNGUnknownChunks_cleanup(info);
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  lodepng_info_cleanup(dest);
-  *dest = *source;
-  lodepng_color_mode_init(&dest->color);
-  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
-  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
-
-  LodePNGUnknownChunks_init(dest);
-  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-  return 0;
-}
-
-void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b)
-{
-  LodePNGInfo temp = *a;
-  *a = *b;
-  *b = temp;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
-static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in)
-{
-  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
-  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
-  unsigned p = index & m;
-  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
-  in = in << (bits * (m - p));
-  if(p == 0) out[index * bits / 8] = in;
-  else out[index * bits / 8] |= in;
-}
-
-typedef struct ColorTree ColorTree;
-
-/*
-One node of a color tree
-This is the data structure used to count the number of unique colors and to get a palette
-index for a color. It's like an octree, but because the alpha channel is used too, each
-node has 16 instead of 8 children.
-*/
-struct ColorTree
-{
-  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
-  int index; /*the payload. Only has a meaningful value if this is in the last level*/
-};
-
-static void color_tree_init(ColorTree* tree)
-{
-  int i;
-  for(i = 0; i != 16; ++i) tree->children[i] = 0;
-  tree->index = -1;
-}
-
-static void color_tree_cleanup(ColorTree* tree)
-{
-  int i;
-  for(i = 0; i != 16; ++i)
-  {
-    if(tree->children[i])
-    {
-      color_tree_cleanup(tree->children[i]);
-      lodepng_free(tree->children[i]);
-    }
-  }
-}
-
-/*returns -1 if color not present, its index otherwise*/
-static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  int bit = 0;
-  for(bit = 0; bit < 8; ++bit)
-  {
-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
-    if(!tree->children[i]) return -1;
-    else tree = tree->children[i];
-  }
-  return tree ? tree->index : -1;
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  return color_tree_get(tree, r, g, b, a) >= 0;
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*color is not allowed to already exist.
-Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
-static void color_tree_add(ColorTree* tree,
-                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index)
-{
-  int bit;
-  for(bit = 0; bit < 8; ++bit)
-  {
-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
-    if(!tree->children[i])
-    {
-      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
-      color_tree_init(tree->children[i]);
-    }
-    tree = tree->children[i];
-  }
-  tree->index = (int)index;
-}
-
-/*put a pixel, given its RGBA color, into image of any color type*/
-static unsigned rgba8ToPixel(unsigned char* out, size_t i,
-                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
-    if(mode->bitdepth == 8) out[i] = grey;
-    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey;
-    else
-    {
-      /*take the most significant bits of grey*/
-      grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
-      addColorBits(out, i, mode->bitdepth, grey);
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      out[i * 3 + 0] = r;
-      out[i * 3 + 1] = g;
-      out[i * 3 + 2] = b;
-    }
-    else
-    {
-      out[i * 6 + 0] = out[i * 6 + 1] = r;
-      out[i * 6 + 2] = out[i * 6 + 3] = g;
-      out[i * 6 + 4] = out[i * 6 + 5] = b;
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    int index = color_tree_get(tree, r, g, b, a);
-    if(index < 0) return 82; /*color not in palette*/
-    if(mode->bitdepth == 8) out[i] = index;
-    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
-    if(mode->bitdepth == 8)
-    {
-      out[i * 2 + 0] = grey;
-      out[i * 2 + 1] = a;
-    }
-    else if(mode->bitdepth == 16)
-    {
-      out[i * 4 + 0] = out[i * 4 + 1] = grey;
-      out[i * 4 + 2] = out[i * 4 + 3] = a;
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      out[i * 4 + 0] = r;
-      out[i * 4 + 1] = g;
-      out[i * 4 + 2] = b;
-      out[i * 4 + 3] = a;
-    }
-    else
-    {
-      out[i * 8 + 0] = out[i * 8 + 1] = r;
-      out[i * 8 + 2] = out[i * 8 + 3] = g;
-      out[i * 8 + 4] = out[i * 8 + 5] = b;
-      out[i * 8 + 6] = out[i * 8 + 7] = a;
-    }
-  }
-
-  return 0; /*no error*/
-}
-
-/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
-static void rgba16ToPixel(unsigned char* out, size_t i,
-                         const LodePNGColorMode* mode,
-                         unsigned short r, unsigned short g, unsigned short b, unsigned short a)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
-    out[i * 2 + 0] = (grey >> 8) & 255;
-    out[i * 2 + 1] = grey & 255;
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    out[i * 6 + 0] = (r >> 8) & 255;
-    out[i * 6 + 1] = r & 255;
-    out[i * 6 + 2] = (g >> 8) & 255;
-    out[i * 6 + 3] = g & 255;
-    out[i * 6 + 4] = (b >> 8) & 255;
-    out[i * 6 + 5] = b & 255;
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
-    out[i * 4 + 0] = (grey >> 8) & 255;
-    out[i * 4 + 1] = grey & 255;
-    out[i * 4 + 2] = (a >> 8) & 255;
-    out[i * 4 + 3] = a & 255;
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    out[i * 8 + 0] = (r >> 8) & 255;
-    out[i * 8 + 1] = r & 255;
-    out[i * 8 + 2] = (g >> 8) & 255;
-    out[i * 8 + 3] = g & 255;
-    out[i * 8 + 4] = (b >> 8) & 255;
-    out[i * 8 + 5] = b & 255;
-    out[i * 8 + 6] = (a >> 8) & 255;
-    out[i * 8 + 7] = a & 255;
-  }
-}
-
-/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
-static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
-                               unsigned char* b, unsigned char* a,
-                               const unsigned char* in, size_t i,
-                               const LodePNGColorMode* mode)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = *g = *b = in[i];
-      if(mode->key_defined && *r == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-    else if(mode->bitdepth == 16)
-    {
-      *r = *g = *b = in[i * 2 + 0];
-      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-    else
-    {
-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
-      size_t j = i * mode->bitdepth;
-      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
-      *r = *g = *b = (value * 255) / highest;
-      if(mode->key_defined && value == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
-      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
-      else *a = 255;
-    }
-    else
-    {
-      *r = in[i * 6 + 0];
-      *g = in[i * 6 + 2];
-      *b = in[i * 6 + 4];
-      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
-      else *a = 255;
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    unsigned index;
-    if(mode->bitdepth == 8) index = in[i];
-    else
-    {
-      size_t j = i * mode->bitdepth;
-      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
-    }
-
-    if(index >= mode->palettesize)
-    {
-      /*This is an error according to the PNG spec, but common PNG decoders make it black instead.
-      Done here too, slightly faster due to no error handling needed.*/
-      *r = *g = *b = 0;
-      *a = 255;
-    }
-    else
-    {
-      *r = mode->palette[index * 4 + 0];
-      *g = mode->palette[index * 4 + 1];
-      *b = mode->palette[index * 4 + 2];
-      *a = mode->palette[index * 4 + 3];
-    }
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = *g = *b = in[i * 2 + 0];
-      *a = in[i * 2 + 1];
-    }
-    else
-    {
-      *r = *g = *b = in[i * 4 + 0];
-      *a = in[i * 4 + 2];
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = in[i * 4 + 0];
-      *g = in[i * 4 + 1];
-      *b = in[i * 4 + 2];
-      *a = in[i * 4 + 3];
-    }
-    else
-    {
-      *r = in[i * 8 + 0];
-      *g = in[i * 8 + 2];
-      *b = in[i * 8 + 4];
-      *a = in[i * 8 + 6];
-    }
-  }
-}
-
-/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
-mode test cases, optimized to convert the colors much faster, when converting
-to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with
-enough memory, if has_alpha is true the output is RGBA. mode has the color mode
-of the input buffer.*/
-static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels,
-                                unsigned has_alpha, const unsigned char* in,
-                                const LodePNGColorMode* mode)
-{
-  unsigned num_channels = has_alpha ? 4 : 3;
-  size_t i;
-  if(mode->colortype == LCT_GREY)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i];
-        if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255;
-      }
-    }
-    else if(mode->bitdepth == 16)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
-        if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
-      }
-    }
-    else
-    {
-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
-      size_t j = 0;
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
-        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
-        if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
-      }
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 3 + 0];
-        buffer[1] = in[i * 3 + 1];
-        buffer[2] = in[i * 3 + 2];
-        if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r
-           && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255;
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 6 + 0];
-        buffer[1] = in[i * 6 + 2];
-        buffer[2] = in[i * 6 + 4];
-        if(has_alpha) buffer[3] = mode->key_defined
-           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
-      }
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    unsigned index;
-    size_t j = 0;
-    for(i = 0; i != numpixels; ++i, buffer += num_channels)
-    {
-      if(mode->bitdepth == 8) index = in[i];
-      else index = readBitsFromReversedStream(&j, in, mode->bitdepth);
-
-      if(index >= mode->palettesize)
-      {
-        /*This is an error according to the PNG spec, but most PNG decoders make it black instead.
-        Done here too, slightly faster due to no error handling needed.*/
-        buffer[0] = buffer[1] = buffer[2] = 0;
-        if(has_alpha) buffer[3] = 255;
-      }
-      else
-      {
-        buffer[0] = mode->palette[index * 4 + 0];
-        buffer[1] = mode->palette[index * 4 + 1];
-        buffer[2] = mode->palette[index * 4 + 2];
-        if(has_alpha) buffer[3] = mode->palette[index * 4 + 3];
-      }
-    }
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
-        if(has_alpha) buffer[3] = in[i * 2 + 1];
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
-        if(has_alpha) buffer[3] = in[i * 4 + 2];
-      }
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 4 + 0];
-        buffer[1] = in[i * 4 + 1];
-        buffer[2] = in[i * 4 + 2];
-        if(has_alpha) buffer[3] = in[i * 4 + 3];
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 8 + 0];
-        buffer[1] = in[i * 8 + 2];
-        buffer[2] = in[i * 8 + 4];
-        if(has_alpha) buffer[3] = in[i * 8 + 6];
-      }
-    }
-  }
-}
-
-/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
-given color type, but the given color type must be 16-bit itself.*/
-static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
-                                const unsigned char* in, size_t i, const LodePNGColorMode* mode)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
-    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
-    else *a = 65535;
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
-    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
-    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
-    if(mode->key_defined
-       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
-    else *a = 65535;
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
-    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
-    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
-    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
-    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
-  }
-}
-
-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
-                         unsigned w, unsigned h)
-{
-  size_t i;
-  ColorTree tree;
-  size_t numpixels = w * h;
-
-  if(lodepng_color_mode_equal(mode_out, mode_in))
-  {
-    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
-    for(i = 0; i != numbytes; ++i) out[i] = in[i];
-    return 0;
-  }
-
-  if(mode_out->colortype == LCT_PALETTE)
-  {
-    size_t palettesize = mode_out->palettesize;
-    const unsigned char* palette = mode_out->palette;
-    size_t palsize = size_t(1) << mode_out->bitdepth;
-    /*if the user specified output palette but did not give the values, assume
-    they want the values of the input color type (assuming that one is palette).
-    Note that we never create a new palette ourselves.*/
-    if(palettesize == 0)
-    {
-      palettesize = mode_in->palettesize;
-      palette = mode_in->palette;
-    }
-    if(palettesize < palsize) palsize = palettesize;
-    color_tree_init(&tree);
-    for(i = 0; i != palsize; ++i)
-    {
-      const unsigned char* p = &palette[i * 4];
-      color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned int)(i));
-    }
-  }
-
-  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16)
-  {
-    for(i = 0; i != numpixels; ++i)
-    {
-      unsigned short r = 0, g = 0, b = 0, a = 0;
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
-      rgba16ToPixel(out, i, mode_out, r, g, b, a);
-    }
-  }
-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA)
-  {
-    getPixelColorsRGBA8(out, numpixels, 1, in, mode_in);
-  }
-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB)
-  {
-    getPixelColorsRGBA8(out, numpixels, 0, in, mode_in);
-  }
-  else
-  {
-    unsigned char r = 0, g = 0, b = 0, a = 0;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
-      CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a));
-    }
-  }
-
-  if(mode_out->colortype == LCT_PALETTE)
-  {
-    color_tree_cleanup(&tree);
-  }
-
-  return 0; /*no error*/
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-void lodepng_color_profile_init(LodePNGColorProfile* profile)
-{
-  profile->colored = 0;
-  profile->key = 0;
-  profile->alpha = 0;
-  profile->key_r = profile->key_g = profile->key_b = 0;
-  profile->numcolors = 0;
-  profile->bits = 1;
-}
-
-/*function used for debug purposes with C++*/
-/*void printColorProfile(LodePNGColorProfile* p)
-{
-  std::cout << "colored: " << (int)p->colored << ", ";
-  std::cout << "key: " << (int)p->key << ", ";
-  std::cout << "key_r: " << (int)p->key_r << ", ";
-  std::cout << "key_g: " << (int)p->key_g << ", ";
-  std::cout << "key_b: " << (int)p->key_b << ", ";
-  std::cout << "alpha: " << (int)p->alpha << ", ";
-  std::cout << "numcolors: " << (int)p->numcolors << ", ";
-  std::cout << "bits: " << (int)p->bits << std::endl;
-}*/
-
-/*Returns how many bits needed to represent given value (max 8 bit)*/
-static unsigned getValueRequiredBits(unsigned char value)
-{
-  if(value == 0 || value == 255) return 1;
-  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
-  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
-  return 8;
-}
-
-/*profile must already have been inited with mode.
-It's ok to set some parameters of profile to done already.*/
-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
-                                   const unsigned char* in, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode)
-{
-  unsigned error = 0;
-  size_t i;
-  ColorTree tree;
-  size_t numpixels = w * h;
-
-  unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0;
-  unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1;
-  unsigned numcolors_done = 0;
-  unsigned bpp = lodepng_get_bpp(mode);
-  unsigned bits_done = bpp == 1 ? 1 : 0;
-  unsigned maxnumcolors = 257;
-  unsigned sixteen = 0;
-  if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256));
-
-  color_tree_init(&tree);
-
-  /*Check if the 16-bit input is truly 16-bit*/
-  if(mode->bitdepth == 16)
-  {
-    unsigned short r, g, b, a;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
-         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/
-      {
-        sixteen = 1;
-        break;
-      }
-    }
-  }
-
-  if(sixteen)
-  {
-    unsigned short r = 0, g = 0, b = 0, a = 0;
-    profile->bits = 16;
-    bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
-
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-
-      if(!colored_done && (r != g || r != b))
-      {
-        profile->colored = 1;
-        colored_done = 1;
-      }
-
-      if(!alpha_done)
-      {
-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
-        if(a != 65535 && (a != 0 || (profile->key && !matchkey)))
-        {
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-        else if(a == 0 && !profile->alpha && !profile->key)
-        {
-          profile->key = 1;
-          profile->key_r = r;
-          profile->key_g = g;
-          profile->key_b = b;
-        }
-        else if(a == 65535 && profile->key && matchkey)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
-    }
-
-    if(profile->key && !profile->alpha)
-    {
-      for(i = 0; i != numpixels; ++i)
-      {
-        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-    }
-  }
-  else /* < 16-bit */
-  {
-    unsigned char r = 0, g = 0, b = 0, a = 0;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
-
-      if(!bits_done && profile->bits < 8)
-      {
-        /*only r is checked, < 8 bits is only relevant for greyscale*/
-        unsigned bits = getValueRequiredBits(r);
-        if(bits > profile->bits) profile->bits = bits;
-      }
-      bits_done = (profile->bits >= bpp);
-
-      if(!colored_done && (r != g || r != b))
-      {
-        profile->colored = 1;
-        colored_done = 1;
-        if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
-      }
-
-      if(!alpha_done)
-      {
-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
-        if(a != 255 && (a != 0 || (profile->key && !matchkey)))
-        {
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-        else if(a == 0 && !profile->alpha && !profile->key)
-        {
-          profile->key = 1;
-          profile->key_r = r;
-          profile->key_g = g;
-          profile->key_b = b;
-        }
-        else if(a == 255 && profile->key && matchkey)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-      }
-
-      if(!numcolors_done)
-      {
-        if(!color_tree_has(&tree, r, g, b, a))
-        {
-          color_tree_add(&tree, r, g, b, a, profile->numcolors);
-          if(profile->numcolors < 256)
-          {
-            unsigned char* p = profile->palette;
-            unsigned n = profile->numcolors;
-            p[n * 4 + 0] = r;
-            p[n * 4 + 1] = g;
-            p[n * 4 + 2] = b;
-            p[n * 4 + 3] = a;
-          }
-          ++profile->numcolors;
-          numcolors_done = profile->numcolors >= maxnumcolors;
-        }
-      }
-
-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
-    }
-
-    if(profile->key && !profile->alpha)
-    {
-      for(i = 0; i != numpixels; ++i)
-      {
-        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
-        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-    }
-
-    /*make the profile's key always 16-bit for consistency - repeat each byte twice*/
-    profile->key_r += (profile->key_r << 8);
-    profile->key_g += (profile->key_g << 8);
-    profile->key_b += (profile->key_b << 8);
-  }
-
-  color_tree_cleanup(&tree);
-  return error;
-}
-
-/*Automatically chooses color type that gives smallest amount of bits in the
-output image, e.g. grey if there are only greyscale pixels, palette if there
-are less than 256 colors, ...
-Updates values of mode with a potentially smaller color model. mode_out should
-contain the user chosen color model, but will be overwritten with the new chosen one.*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in)
-{
-  LodePNGColorProfile prof;
-  unsigned error = 0;
-  unsigned i, n, palettebits, grey_ok, palette_ok;
-
-  lodepng_color_profile_init(&prof);
-  error = lodepng_get_color_profile(&prof, image, w, h, mode_in);
-  if(error) return error;
-  mode_out->key_defined = 0;
-
-  if(prof.key && w * h <= 16)
-  {
-    prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
-    if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-  }
-  grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/
-  n = prof.numcolors;
-  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
-  palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8;
-  if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
-  if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/
-
-  if(palette_ok)
-  {
-    unsigned char* p = prof.palette;
-    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
-    for(i = 0; i != prof.numcolors; ++i)
-    {
-      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
-      if(error) break;
-    }
-
-    mode_out->colortype = LCT_PALETTE;
-    mode_out->bitdepth = palettebits;
-
-    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
-        && mode_in->bitdepth == mode_out->bitdepth)
-    {
-      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
-      lodepng_color_mode_cleanup(mode_out);
-      lodepng_color_mode_copy(mode_out, mode_in);
-    }
-  }
-  else /*8-bit or 16-bit per channel*/
-  {
-    mode_out->bitdepth = prof.bits;
-    mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA)
-                                     : (prof.colored ? LCT_RGB : LCT_GREY);
-
-    if(prof.key && !prof.alpha)
-    {
-      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/
-      mode_out->key_r = prof.key_r & mask;
-      mode_out->key_g = prof.key_g & mask;
-      mode_out->key_b = prof.key_b & mask;
-      mode_out->key_defined = 1;
-    }
-  }
-
-  return error;
-}
-
-#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
-
-/*
-Paeth predicter, used by PNG filter type 4
-The parameters are of type short, but should come from unsigned chars, the shorts
-are only needed to make the paeth calculation correct.
-*/
-static unsigned char paethPredictor(short a, short b, short c)
-{
-  short pa = abs(b - c);
-  short pb = abs(a - c);
-  short pc = abs(a + b - c - c);
-
-  if(pc < pa && pc < pb) return (unsigned char)c;
-  else if(pb < pa) return (unsigned char)b;
-  else return (unsigned char)a;
-}
-
-/*shared values used by multiple Adam7 related functions*/
-
-static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
-static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
-static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
-static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
-
-/*
-Outputs various dimensions and positions in the image related to the Adam7 reduced images.
-passw: output containing the width of the 7 passes
-passh: output containing the height of the 7 passes
-filter_passstart: output containing the index of the start and end of each
- reduced image with filter bytes
-padded_passstart output containing the index of the start and end of each
- reduced image when without filter bytes but with padded scanlines
-passstart: output containing the index of the start and end of each reduced
- image without padding between scanlines, but still padding between the images
-w, h: width and height of non-interlaced image
-bpp: bits per pixel
-"padded" is only relevant if bpp is less than 8 and a scanline or image does not
- end at a full byte
-*/
-static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
-                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp)
-{
-  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
-  unsigned i;
-
-  /*calculate width and height in pixels of each pass*/
-  for(i = 0; i != 7; ++i)
-  {
-    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
-    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
-    if(passw[i] == 0) passh[i] = 0;
-    if(passh[i] == 0) passw[i] = 0;
-  }
-
-  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
-  for(i = 0; i != 7; ++i)
-  {
-    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
-    filter_passstart[i + 1] = filter_passstart[i]
-                            + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0);
-    /*bits padded if needed to fill full byte at end of each scanline*/
-    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8);
-    /*only padded at end of reduced image*/
-    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8;
-  }
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG Decoder                                                            / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*read the information from the header and store it in the LodePNGInfo. return value is error*/
-unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
-                         const unsigned char* in, size_t insize)
-{
-  LodePNGInfo* info = &state->info_png;
-  if(insize == 0 || in == 0)
-  {
-    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
-  }
-  if(insize < 33)
-  {
-    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
-  }
-
-  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
-  lodepng_info_cleanup(info);
-  lodepng_info_init(info);
-
-  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
-     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10)
-  {
-    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
-  }
-  if(lodepng_chunk_length(in + 8) != 13)
-  {
-    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
-  }
-  if(!lodepng_chunk_type_equals(in + 8, "IHDR"))
-  {
-    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
-  }
-
-  /*read the values given in the header*/
-  *w = lodepng_read32bitInt(&in[16]);
-  *h = lodepng_read32bitInt(&in[20]);
-  info->color.bitdepth = in[24];
-  info->color.colortype = (LodePNGColorType)in[25];
-  info->compression_method = in[26];
-  info->filter_method = in[27];
-  info->interlace_method = in[28];
-
-  if(*w == 0 || *h == 0)
-  {
-    CERROR_RETURN_ERROR(state->error, 93);
-  }
-
-  if(!state->decoder.ignore_crc)
-  {
-    unsigned CRC = lodepng_read32bitInt(&in[29]);
-    unsigned checksum = lodepng_crc32(&in[12], 17);
-    if(CRC != checksum)
-    {
-      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
-    }
-  }
-
-  /*error: only compression method 0 is allowed in the specification*/
-  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
-  /*error: only filter method 0 is allowed in the specification*/
-  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
-  /*error: only interlace methods 0 and 1 exist in the specification*/
-  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
-
-  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
-  return state->error;
-}
-
-static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
-                                 size_t bytewidth, unsigned char filterType, size_t length)
-{
-  /*
-  For PNG filter method 0
-  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
-  the filter works byte per byte (bytewidth = 1)
-  precon is the previous unfiltered scanline, recon the result, scanline the current one
-  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
-  recon and scanline MAY be the same memory address! precon must be disjoint.
-  */
-
-  size_t i;
-  switch(filterType)
-  {
-    case 0:
-      for(i = 0; i != length; ++i) recon[i] = scanline[i];
-      break;
-    case 1:
-      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
-      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
-      break;
-    case 2:
-      if(precon)
-      {
-        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
-      }
-      else
-      {
-        for(i = 0; i != length; ++i) recon[i] = scanline[i];
-      }
-      break;
-    case 3:
-      if(precon)
-      {
-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1);
-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1);
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1);
-      }
-      break;
-    case 4:
-      if(precon)
-      {
-        for(i = 0; i != bytewidth; ++i)
-        {
-          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
-        }
-        for(i = bytewidth; i < length; ++i)
-        {
-          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
-        }
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i)
-        {
-          recon[i] = scanline[i];
-        }
-        for(i = bytewidth; i < length; ++i)
-        {
-          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
-          recon[i] = (scanline[i] + recon[i - bytewidth]);
-        }
-      }
-      break;
-    default: return 36; /*error: unexisting filter type given*/
-  }
-  return 0;
-}
-
-static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  /*
-  For PNG filter method 0
-  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
-  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
-  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
-  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
-  */
-
-  unsigned y;
-  unsigned char* prevline = 0;
-
-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
-  size_t bytewidth = (bpp + 7) / 8;
-  size_t linebytes = (w * bpp + 7) / 8;
-
-  for(y = 0; y < h; ++y)
-  {
-    size_t outindex = linebytes * y;
-    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-    unsigned char filterType = in[inindex];
-
-    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
-
-    prevline = &out[outindex];
-  }
-
-  return 0;
-}
-
-/*
-in: Adam7 interlaced image, with no padding bits between scanlines, but between
- reduced images so that each reduced image starts at a byte.
-out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
-bpp: bits per pixel
-out has the following size in bits: w * h * bpp.
-in is possibly bigger due to padding bits between reduced images.
-out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
-(because that's likely a little bit faster)
-NOTE: comments about padding bits are only relevant if bpp < 8
-*/
-static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  unsigned passw[7], passh[7];
-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
-  unsigned i;
-
-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-  if(bpp >= 8)
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      size_t bytewidth = bpp / 8;
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
-        for(b = 0; b < bytewidth; ++b)
-        {
-          out[pixeloutstart + b] = in[pixelinstart + b];
-        }
-      }
-    }
-  }
-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      unsigned ilinebits = bpp * passw[i];
-      unsigned olinebits = bpp * w;
-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
-        for(b = 0; b < bpp; ++b)
-        {
-          unsigned char bit = readBitFromReversedStream(&ibp, in);
-          /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/
-          setBitOfReversedStream0(&obp, out, bit);
-        }
-      }
-    }
-  }
-}
-
-static void removePaddingBits(unsigned char* out, const unsigned char* in,
-                              size_t olinebits, size_t ilinebits, unsigned h)
-{
-  /*
-  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
-  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
-  for the Adam7 code, the color convert code and the output to the user.
-  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
-  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
-  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
-  only useful if (ilinebits - olinebits) is a value in the range 1..7
-  */
-  unsigned y;
-  size_t diff = ilinebits - olinebits;
-  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
-  for(y = 0; y < h; ++y)
-  {
-    size_t x;
-    for(x = 0; x < olinebits; ++x)
-    {
-      unsigned char bit = readBitFromReversedStream(&ibp, in);
-      setBitOfReversedStream(&obp, out, bit);
-    }
-    ibp += diff;
-  }
-}
-
-/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
-the IDAT chunks (with filter index bytes and possible padding bits)
-return value is error*/
-static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
-                                     unsigned w, unsigned h, const LodePNGInfo* info_png)
-{
-  /*
-  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
-  Steps:
-  *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8)
-  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
-  NOTE: the in buffer will be overwritten with intermediate data!
-  */
-  unsigned bpp = lodepng_get_bpp(&info_png->color);
-  if(bpp == 0) return 31; /*error: invalid colortype*/
-
-  if(info_png->interlace_method == 0)
-  {
-    if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
-    {
-      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
-      removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h);
-    }
-    /*we can immediately filter into the out buffer, no other steps needed*/
-    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
-  }
-  else /*interlace_method is 1 (Adam7)*/
-  {
-    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
-    unsigned i;
-
-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-    for(i = 0; i != 7; ++i)
-    {
-      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
-      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
-      move bytes instead of bits or move not at all*/
-      if(bpp < 8)
-      {
-        /*remove padding bits in scanlines; after this there still may be padding
-        bits between the different reduced images: each reduced image still starts nicely at a byte*/
-        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
-                          ((passw[i] * bpp + 7) / 8) * 8, passh[i]);
-      }
-    }
-
-    Adam7_deinterlace(out, in, w, h, bpp);
-  }
-
-  return 0;
-}
-
-static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
-{
-  unsigned pos = 0, i;
-  if(color->palette) lodepng_free(color->palette);
-  color->palettesize = chunkLength / 3;
-  color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize);
-  if(!color->palette && color->palettesize)
-  {
-    color->palettesize = 0;
-    return 83; /*alloc fail*/
-  }
-  if(color->palettesize > 256) return 38; /*error: palette too big*/
-
-  for(i = 0; i != color->palettesize; ++i)
-  {
-    color->palette[4 * i + 0] = data[pos++]; /*R*/
-    color->palette[4 * i + 1] = data[pos++]; /*G*/
-    color->palette[4 * i + 2] = data[pos++]; /*B*/
-    color->palette[4 * i + 3] = 255; /*alpha*/
-  }
-
-  return 0; /* OK */
-}
-
-static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
-{
-  unsigned i;
-  if(color->colortype == LCT_PALETTE)
-  {
-    /*error: more alpha values given than there are palette entries*/
-    if(chunkLength > color->palettesize) return 38;
-
-    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
-  }
-  else if(color->colortype == LCT_GREY)
-  {
-    /*error: this chunk must be 2 bytes for greyscale image*/
-    if(chunkLength != 2) return 30;
-
-    color->key_defined = 1;
-    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
-  }
-  else if(color->colortype == LCT_RGB)
-  {
-    /*error: this chunk must be 6 bytes for RGB image*/
-    if(chunkLength != 6) return 41;
-
-    color->key_defined = 1;
-    color->key_r = 256u * data[0] + data[1];
-    color->key_g = 256u * data[2] + data[3];
-    color->key_b = 256u * data[4] + data[5];
-  }
-  else return 42; /*error: tRNS chunk not allowed for other color models*/
-
-  return 0; /* OK */
-}
-
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*background color chunk (bKGD)*/
-static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(info->color.colortype == LCT_PALETTE)
-  {
-    /*error: this chunk must be 1 byte for indexed color image*/
-    if(chunkLength != 1) return 43;
-
-    info->background_defined = 1;
-    info->background_r = info->background_g = info->background_b = data[0];
-  }
-  else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
-  {
-    /*error: this chunk must be 2 bytes for greyscale image*/
-    if(chunkLength != 2) return 44;
-
-    info->background_defined = 1;
-    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
-  }
-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
-  {
-    /*error: this chunk must be 6 bytes for greyscale image*/
-    if(chunkLength != 6) return 45;
-
-    info->background_defined = 1;
-    info->background_r = 256u * data[0] + data[1];
-    info->background_g = 256u * data[2] + data[3];
-    info->background_b = 256u * data[4] + data[5];
-  }
-
-  return 0; /* OK */
-}
-
-/*text chunk (tEXt)*/
-static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  char *key = 0, *str = 0;
-  unsigned i;
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    unsigned length, string2_begin;
-
-    length = 0;
-    while(length < chunkLength && data[length] != 0) ++length;
-    /*even though it's not allowed by the standard, no error is thrown if
-    there's no null termination char, if the text is empty*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    string2_begin = length + 1; /*skip keyword null terminator*/
-
-    length = chunkLength < string2_begin ? 0 : chunkLength - string2_begin;
-    str = (char*)lodepng_malloc(length + 1);
-    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    str[length] = 0;
-    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
-
-    error = lodepng_add_text(info, key, str);
-
-    break;
-  }
-
-  lodepng_free(key);
-  lodepng_free(str);
-
-  return error;
-}
-
-/*compressed text chunk (zTXt)*/
-static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
-                               const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  unsigned i;
-
-  unsigned length, string2_begin;
-  char *key = 0;
-  ucvector decoded;
-
-  ucvector_init(&decoded);
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
-    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
-
-    string2_begin = length + 2;
-    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
-
-    length = chunkLength - string2_begin;
-    /*will fail if zlib error, e.g. if length is too small*/
-    error = zlib_decompress(&decoded.data, &decoded.size,
-                            (unsigned char*)(&data[string2_begin]),
-                            length, zlibsettings);
-    if(error) break;
-    ucvector_push_back(&decoded, 0);
-
-    error = lodepng_add_text(info, key, (char*)decoded.data);
-
-    break;
-  }
-
-  lodepng_free(key);
-  ucvector_cleanup(&decoded);
-
-  return error;
-}
-
-/*international text chunk (iTXt)*/
-static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
-                               const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  unsigned i;
-
-  unsigned length, begin, compressed;
-  char *key = 0, *langtag = 0, *transkey = 0;
-  ucvector decoded;
-  ucvector_init(&decoded);
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    /*Quick check if the chunk length isn't too small. Even without check
-    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
-    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
-
-    /*read the key*/
-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
-    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    /*read the compression method*/
-    compressed = data[length + 1];
-    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
-
-    /*even though it's not allowed by the standard, no error is thrown if
-    there's no null termination char, if the text is empty for the next 3 texts*/
-
-    /*read the langtag*/
-    begin = length + 3;
-    length = 0;
-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
-
-    langtag = (char*)lodepng_malloc(length + 1);
-    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    langtag[length] = 0;
-    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
-
-    /*read the transkey*/
-    begin += length + 1;
-    length = 0;
-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
-
-    transkey = (char*)lodepng_malloc(length + 1);
-    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    transkey[length] = 0;
-    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
-
-    /*read the actual text*/
-    begin += length + 1;
-
-    length = chunkLength < begin ? 0 : chunkLength - begin;
-
-    if(compressed)
-    {
-      /*will fail if zlib error, e.g. if length is too small*/
-      error = zlib_decompress(&decoded.data, &decoded.size,
-                              (unsigned char*)(&data[begin]),
-                              length, zlibsettings);
-      if(error) break;
-      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
-      ucvector_push_back(&decoded, 0);
-    }
-    else
-    {
-      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
-
-      decoded.data[length] = 0;
-      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
-    }
-
-    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
-
-    break;
-  }
-
-  lodepng_free(key);
-  lodepng_free(langtag);
-  lodepng_free(transkey);
-  ucvector_cleanup(&decoded);
-
-  return error;
-}
-
-static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
-
-  info->time_defined = 1;
-  info->time.year = 256u * data[0] + data[1];
-  info->time.month = data[2];
-  info->time.day = data[3];
-  info->time.hour = data[4];
-  info->time.minute = data[5];
-  info->time.second = data[6];
-
-  return 0; /* OK */
-}
-
-static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
-
-  info->phys_defined = 1;
-  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
-  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
-  info->phys_unit = data[8];
-
-  return 0; /* OK */
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
-static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
-                          LodePNGState* state,
-                          const unsigned char* in, size_t insize)
-{
-  unsigned char IEND = 0;
-  const unsigned char* chunk;
-  size_t i;
-  ucvector idat; /*the data from idat chunks*/
-  ucvector scanlines;
-  size_t predict;
-  size_t numpixels;
-  size_t outsize = 0;
-
-  /*for unknown chunk order*/
-  unsigned unknown = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-
-  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
-  if(state->error) return;
-
-  numpixels = *w * *h;
-
-  /*multiplication overflow*/
-  if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92);
-  /*multiplication overflow possible further below. Allows up to 2^31-1 pixel
-  bytes with 16-bit RGBA, the rest is room for filter bytes.*/
-  if(numpixels > 268435455) CERROR_RETURN(state->error, 92);
-
-  ucvector_init(&idat);
-  chunk = &in[33]; /*first byte of the first chunk after the header*/
-
-  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
-  IDAT data is put at the start of the in buffer*/
-  while(!IEND && !state->error)
-  {
-    unsigned chunkLength;
-    const unsigned char* data; /*the data in the chunk*/
-
-    /*error: size of the in buffer too small to contain next chunk*/
-    if((size_t)((chunk - in) + 12) > insize || chunk < in) CERROR_BREAK(state->error, 30);
-
-    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
-    chunkLength = lodepng_chunk_length(chunk);
-    /*error: chunk length larger than the max PNG chunk size*/
-    if(chunkLength > 2147483647) CERROR_BREAK(state->error, 63);
-
-    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in)
-    {
-      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
-    }
-
-    data = lodepng_chunk_data_const(chunk);
-
-    /*IDAT chunk, containing compressed image data*/
-    if(lodepng_chunk_type_equals(chunk, "IDAT"))
-    {
-      size_t oldsize = idat.size;
-      if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
-      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      critical_pos = 3;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-    /*IEND chunk*/
-    else if(lodepng_chunk_type_equals(chunk, "IEND"))
-    {
-      IEND = 1;
-    }
-    /*palette chunk (PLTE)*/
-    else if(lodepng_chunk_type_equals(chunk, "PLTE"))
-    {
-      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
-      if(state->error) break;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      critical_pos = 2;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-    /*palette transparency chunk (tRNS)*/
-    else if(lodepng_chunk_type_equals(chunk, "tRNS"))
-    {
-      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
-      if(state->error) break;
-    }
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*background color chunk (bKGD)*/
-    else if(lodepng_chunk_type_equals(chunk, "bKGD"))
-    {
-      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-    /*text chunk (tEXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "tEXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    /*compressed text chunk (zTXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "zTXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    /*international text chunk (iTXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "iTXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    else if(lodepng_chunk_type_equals(chunk, "tIME"))
-    {
-      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-    else if(lodepng_chunk_type_equals(chunk, "pHYs"))
-    {
-      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    else /*it's not an implemented chunk type, so ignore it: skip over the data*/
-    {
-      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
-      if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69);
-
-      unknown = 1;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      if(state->decoder.remember_unknown_chunks)
-      {
-        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
-                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
-        if(state->error) break;
-      }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-
-    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/
-    {
-      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
-    }
-
-    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
-  }
-
-  ucvector_init(&scanlines);
-  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
-  If the decompressed size does not match the prediction, the image must be corrupt.*/
-  if(state->info_png.interlace_method == 0)
-  {
-    /*The extra *h is added because this are the filter bytes every scanline starts with*/
-    predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h;
-  }
-  else
-  {
-    /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/
-    const LodePNGColorMode* color = &state->info_png.color;
-    predict = 0;
-    predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
-    if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
-    predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3);
-    if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2);
-    predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2);
-    if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1);
-    predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1);
-  }
-  if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/
-  if(!state->error)
-  {
-    state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data,
-                                   idat.size, &state->decoder.zlibsettings);
-    if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/
-  }
-  ucvector_cleanup(&idat);
-
-  if(!state->error)
-  {
-    outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
-    *out = (unsigned char*)lodepng_malloc(outsize);
-    if(!*out) state->error = 83; /*alloc fail*/
-  }
-  if(!state->error)
-  {
-    for(i = 0; i < outsize; i++) (*out)[i] = 0;
-    state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png);
-  }
-  ucvector_cleanup(&scanlines);
-}
-
-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
-                        LodePNGState* state,
-                        const unsigned char* in, size_t insize)
-{
-  *out = 0;
-  decodeGeneric(out, w, h, state, in, insize);
-  if(state->error) return state->error;
-  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color))
-  {
-    /*same color type, no copying or converting of data needed*/
-    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
-    the raw image has to the end user*/
-    if(!state->decoder.color_convert)
-    {
-      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
-      if(state->error) return state->error;
-    }
-  }
-  else
-  {
-    /*color conversion needed; sort of copy of the data*/
-    unsigned char* data = *out;
-    size_t outsize;
-
-    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
-    from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/
-    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
-       && !(state->info_raw.bitdepth == 8))
-    {
-      return 56; /*unsupported color mode conversion*/
-    }
-
-    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
-    *out = (unsigned char*)lodepng_malloc(outsize);
-    if(!(*out))
-    {
-      state->error = 83; /*alloc fail*/
-    }
-    else state->error = lodepng_convert(*out, data, &state->info_raw,
-                                        &state->info_png.color, *w, *h);
-    lodepng_free(data);
-  }
-  return state->error;
-}
-
-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
-                               size_t insize, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned error;
-  LodePNGState state;
-  lodepng_state_init(&state);
-  state.info_raw.colortype = colortype;
-  state.info_raw.bitdepth = bitdepth;
-  error = lodepng_decode(out, w, h, &state, in, insize);
-  lodepng_state_cleanup(&state);
-  return error;
-}
-
-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
-{
-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
-}
-
-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
-{
-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
-                             LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize;
-  unsigned error;
-  error = lodepng_load_file(&buffer, &buffersize, filename);
-  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
-{
-  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
-}
-
-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
-{
-  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
-}
-#endif /*LODEPNG_COMPILE_DISK*/
-
-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings)
-{
-  settings->color_convert = 1;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  settings->read_text_chunks = 1;
-  settings->remember_unknown_chunks = 0;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-  settings->ignore_crc = 0;
-  lodepng_decompress_settings_init(&settings->zlibsettings);
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
-
-void lodepng_state_init(LodePNGState* state)
-{
-#ifdef LODEPNG_COMPILE_DECODER
-  lodepng_decoder_settings_init(&state->decoder);
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-  lodepng_encoder_settings_init(&state->encoder);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-  lodepng_color_mode_init(&state->info_raw);
-  lodepng_info_init(&state->info_png);
-  state->error = 1;
-}
-
-void lodepng_state_cleanup(LodePNGState* state)
-{
-  lodepng_color_mode_cleanup(&state->info_raw);
-  lodepng_info_cleanup(&state->info_png);
-}
-
-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source)
-{
-  lodepng_state_cleanup(dest);
-  *dest = *source;
-  lodepng_color_mode_init(&dest->info_raw);
-  lodepng_info_init(&dest->info_png);
-  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
-  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
-}
-
-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG Encoder                                                            / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*chunkName must be string of 4 characters*/
-static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length)
-{
-  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
-  out->allocsize = out->size; /*fix the allocsize again*/
-  return 0;
-}
-
-static void writeSignature(ucvector* out)
-{
-  /*8 bytes PNG signature, aka the magic bytes*/
-  ucvector_push_back(out, 137);
-  ucvector_push_back(out, 80);
-  ucvector_push_back(out, 78);
-  ucvector_push_back(out, 71);
-  ucvector_push_back(out, 13);
-  ucvector_push_back(out, 10);
-  ucvector_push_back(out, 26);
-  ucvector_push_back(out, 10);
-}
-
-static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
-                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method)
-{
-  unsigned error = 0;
-  ucvector header;
-  ucvector_init(&header);
-
-  lodepng_add32bitInt(&header, w); /*width*/
-  lodepng_add32bitInt(&header, h); /*height*/
-  ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/
-  ucvector_push_back(&header, (unsigned char)colortype); /*color type*/
-  ucvector_push_back(&header, 0); /*compression method*/
-  ucvector_push_back(&header, 0); /*filter method*/
-  ucvector_push_back(&header, interlace_method); /*interlace method*/
-
-  error = addChunk(out, "IHDR", header.data, header.size);
-  ucvector_cleanup(&header);
-
-  return error;
-}
-
-static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector PLTE;
-  ucvector_init(&PLTE);
-  for(i = 0; i != info->palettesize * 4; ++i)
-  {
-    /*add all channels except alpha channel*/
-    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
-  }
-  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
-  ucvector_cleanup(&PLTE);
-
-  return error;
-}
-
-static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector tRNS;
-  ucvector_init(&tRNS);
-  if(info->colortype == LCT_PALETTE)
-  {
-    size_t amount = info->palettesize;
-    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
-    for(i = info->palettesize; i != 0; --i)
-    {
-      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
-      else break;
-    }
-    /*add only alpha channel*/
-    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
-  }
-  else if(info->colortype == LCT_GREY)
-  {
-    if(info->key_defined)
-    {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-    }
-  }
-  else if(info->colortype == LCT_RGB)
-  {
-    if(info->key_defined)
-    {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
-    }
-  }
-
-  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
-  ucvector_cleanup(&tRNS);
-
-  return error;
-}
-
-static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
-                              LodePNGCompressSettings* zlibsettings)
-{
-  ucvector zlibdata;
-  unsigned error = 0;
-
-  /*compress with the Zlib compressor*/
-  ucvector_init(&zlibdata);
-  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
-  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
-  ucvector_cleanup(&zlibdata);
-
-  return error;
-}
-
-static unsigned addChunk_IEND(ucvector* out)
-{
-  unsigned error = 0;
-  error = addChunk(out, "IEND", 0, 0);
-  return error;
-}
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-
-static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector text;
-  ucvector_init(&text);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&text, 0); /*0 termination char*/
-  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
-  error = addChunk(out, "tEXt", text.data, text.size);
-  ucvector_cleanup(&text);
-
-  return error;
-}
-
-static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
-                              LodePNGCompressSettings* zlibsettings)
-{
-  unsigned error = 0;
-  ucvector data, compressed;
-  size_t i, textsize = strlen(textstring);
-
-  ucvector_init(&data);
-  ucvector_init(&compressed);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*0 termination char*/
-  ucvector_push_back(&data, 0); /*compression method: 0*/
-
-  error = zlib_compress(&compressed.data, &compressed.size,
-                        (unsigned char*)textstring, textsize, zlibsettings);
-  if(!error)
-  {
-    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
-    error = addChunk(out, "zTXt", data.data, data.size);
-  }
-
-  ucvector_cleanup(&compressed);
-  ucvector_cleanup(&data);
-  return error;
-}
-
-static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
-                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings)
-{
-  unsigned error = 0;
-  ucvector data;
-  size_t i, textsize = strlen(textstring);
-
-  ucvector_init(&data);
-
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*null termination char*/
-  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
-  ucvector_push_back(&data, 0); /*compression method*/
-  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-
-  if(compressed)
-  {
-    ucvector compressed_data;
-    ucvector_init(&compressed_data);
-    error = zlib_compress(&compressed_data.data, &compressed_data.size,
-                          (unsigned char*)textstring, textsize, zlibsettings);
-    if(!error)
-    {
-      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
-    }
-    ucvector_cleanup(&compressed_data);
-  }
-  else /*not compressed*/
-  {
-    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
-  }
-
-  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
-  ucvector_cleanup(&data);
-  return error;
-}
-
-static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info)
-{
-  unsigned error = 0;
-  ucvector bKGD;
-  ucvector_init(&bKGD);
-  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
-  }
-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255));
-  }
-  else if(info->color.colortype == LCT_PALETTE)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/
-  }
-
-  error = addChunk(out, "bKGD", bKGD.data, bKGD.size);
-  ucvector_cleanup(&bKGD);
-
-  return error;
-}
-
-static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time)
-{
-  unsigned error = 0;
-  unsigned char* data = (unsigned char*)lodepng_malloc(7);
-  if(!data) return 83; /*alloc fail*/
-  data[0] = (unsigned char)(time->year >> 8);
-  data[1] = (unsigned char)(time->year & 255);
-  data[2] = (unsigned char)time->month;
-  data[3] = (unsigned char)time->day;
-  data[4] = (unsigned char)time->hour;
-  data[5] = (unsigned char)time->minute;
-  data[6] = (unsigned char)time->second;
-  error = addChunk(out, "tIME", data, 7);
-  lodepng_free(data);
-  return error;
-}
-
-static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info)
-{
-  unsigned error = 0;
-  ucvector data;
-  ucvector_init(&data);
-
-  lodepng_add32bitInt(&data, info->phys_x);
-  lodepng_add32bitInt(&data, info->phys_y);
-  ucvector_push_back(&data, info->phys_unit);
-
-  error = addChunk(out, "pHYs", data.data, data.size);
-  ucvector_cleanup(&data);
-
-  return error;
-}
-
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
-                           size_t length, size_t bytewidth, unsigned char filterType)
-{
-  size_t i;
-  switch(filterType)
-  {
-    case 0: /*None*/
-      for(i = 0; i != length; ++i) out[i] = scanline[i];
-      break;
-    case 1: /*Sub*/
-      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
-      break;
-    case 2: /*Up*/
-      if(prevline)
-      {
-        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
-      }
-      else
-      {
-        for(i = 0; i != length; ++i) out[i] = scanline[i];
-      }
-      break;
-    case 3: /*Average*/
-      if(prevline)
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
-      }
-      break;
-    case 4: /*Paeth*/
-      if(prevline)
-      {
-        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
-        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
-        for(i = bytewidth; i < length; ++i)
-        {
-          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
-        }
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
-        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
-      }
-      break;
-    default: return; /*unexisting filter type given*/
-  }
-}
-
-/* log2 approximation. A slight bit faster than std::log. */
-static float flog2(float f)
-{
-  float result = 0;
-  while(f > 32) { result += 4; f /= 16; }
-  while(f > 2) { ++result; f /= 2; }
-  return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f);
-}
-
-static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
-                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings)
-{
-  /*
-  For PNG filter method 0
-  out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are
-  the scanlines with 1 extra byte per scanline
-  */
-
-  unsigned bpp = lodepng_get_bpp(info);
-  /*the width of a scanline in bytes, not including the filter type*/
-  size_t linebytes = (w * bpp + 7) / 8;
-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
-  size_t bytewidth = (bpp + 7) / 8;
-  const unsigned char* prevline = 0;
-  unsigned x, y;
-  unsigned error = 0;
-  LodePNGFilterStrategy strategy = settings->filter_strategy;
-
-  /*
-  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
-   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
-      use fixed filtering, with the filter None).
-   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
-     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
-     all five filters and select the filter that produces the smallest sum of absolute values per row.
-  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
-
-  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
-  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
-  heuristic is used.
-  */
-  if(settings->filter_palette_zero &&
-     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
-
-  if(bpp == 0) return 31; /*error: invalid color type*/
-
-  if(strategy == LFS_ZERO)
-  {
-    for(y = 0; y != h; ++y)
-    {
-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-      size_t inindex = linebytes * y;
-      out[outindex] = 0; /*filter type byte*/
-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0);
-      prevline = &in[inindex];
-    }
-  }
-  else if(strategy == LFS_MINSUM)
-  {
-    /*adaptive filtering*/
-    size_t sum[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    size_t smallest = 0;
-    unsigned char type, bestType = 0;
-
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-
-    if(!error)
-    {
-      for(y = 0; y != h; ++y)
-      {
-        /*try the 5 filter types*/
-        for(type = 0; type != 5; ++type)
-        {
-          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-
-          /*calculate the sum of the result*/
-          sum[type] = 0;
-          if(type == 0)
-          {
-            for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]);
-          }
-          else
-          {
-            for(x = 0; x != linebytes; ++x)
-            {
-              /*For differences, each byte should be treated as signed, values above 127 are negative
-              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
-              This means filtertype 0 is almost never chosen, but that is justified.*/
-              unsigned char s = attempt[type][x];
-              sum[type] += s < 128 ? s : (255U - s);
-            }
-          }
-
-          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-          if(type == 0 || sum[type] < smallest)
-          {
-            bestType = type;
-            smallest = sum[type];
-          }
-        }
-
-        prevline = &in[y * linebytes];
-
-        /*now fill the out values*/
-        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-      }
-    }
-
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else if(strategy == LFS_ENTROPY)
-  {
-    float sum[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    float smallest = 0;
-    unsigned type, bestType = 0;
-    unsigned count[256];
-
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-
-    for(y = 0; y != h; ++y)
-    {
-      /*try the 5 filter types*/
-      for(type = 0; type != 5; ++type)
-      {
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        for(x = 0; x != 256; ++x) count[x] = 0;
-        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
-        ++count[type]; /*the filter type itself is part of the scanline*/
-        sum[type] = 0;
-        for(x = 0; x != 256; ++x)
-        {
-          float p = count[x] / (float)(linebytes + 1);
-          sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p;
-        }
-        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || sum[type] < smallest)
-        {
-          bestType = type;
-          smallest = sum[type];
-        }
-      }
-
-      prevline = &in[y * linebytes];
-
-      /*now fill the out values*/
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-    }
-
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else if(strategy == LFS_PREDEFINED)
-  {
-    for(y = 0; y != h; ++y)
-    {
-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-      size_t inindex = linebytes * y;
-      unsigned char type = settings->predefined_filters[y];
-      out[outindex] = type; /*filter type byte*/
-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
-      prevline = &in[inindex];
-    }
-  }
-  else if(strategy == LFS_BRUTE_FORCE)
-  {
-    /*brute force filter chooser.
-    deflate the scanline after every filter attempt to see which one deflates best.
-    This is very slow and gives only slightly smaller, sometimes even larger, result*/
-    size_t size[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    size_t smallest = 0;
-    unsigned type = 0, bestType = 0;
-    unsigned char* dummy;
-    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
-    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
-    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
-    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
-    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
-    zlibsettings.btype = 1;
-    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
-    images only, so disable it*/
-    zlibsettings.custom_zlib = 0;
-    zlibsettings.custom_deflate = 0;
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-    for(y = 0; y != h; ++y) /*try the 5 filter types*/
-    {
-      for(type = 0; type != 5; ++type)
-      {
-        unsigned testsize = linebytes;
-        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
-
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        size[type] = 0;
-        dummy = 0;
-        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
-        lodepng_free(dummy);
-        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || size[type] < smallest)
-        {
-          bestType = type;
-          smallest = size[type];
-        }
-      }
-      prevline = &in[y * linebytes];
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-    }
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else return 88; /* unknown filter strategy */
-
-  return error;
-}
-
-static void addPaddingBits(unsigned char* out, const unsigned char* in,
-                           size_t olinebits, size_t ilinebits, unsigned h)
-{
-  /*The opposite of the removePaddingBits function
-  olinebits must be >= ilinebits*/
-  unsigned y;
-  size_t diff = olinebits - ilinebits;
-  size_t obp = 0, ibp = 0; /*bit pointers*/
-  for(y = 0; y != h; ++y)
-  {
-    size_t x;
-    for(x = 0; x < ilinebits; ++x)
-    {
-      unsigned char bit = readBitFromReversedStream(&ibp, in);
-      setBitOfReversedStream(&obp, out, bit);
-    }
-    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
-    "Use of uninitialised value of size ###" warning from valgrind*/
-    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
-  }
-}
-
-/*
-in: non-interlaced image with size w*h
-out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
- no padding bits between scanlines, but between reduced images so that each
- reduced image starts at a byte.
-bpp: bits per pixel
-there are no padding bits, not between scanlines, not between reduced images
-in has the following size in bits: w * h * bpp.
-out is possibly bigger due to padding bits between reduced images
-NOTE: comments about padding bits are only relevant if bpp < 8
-*/
-static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  unsigned passw[7], passh[7];
-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
-  unsigned i;
-
-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-  if(bpp >= 8)
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      size_t bytewidth = bpp / 8;
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
-        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        for(b = 0; b < bytewidth; ++b)
-        {
-          out[pixeloutstart + b] = in[pixelinstart + b];
-        }
-      }
-    }
-  }
-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      unsigned ilinebits = bpp * passw[i];
-      unsigned olinebits = bpp * w;
-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
-        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        for(b = 0; b < bpp; ++b)
-        {
-          unsigned char bit = readBitFromReversedStream(&ibp, in);
-          setBitOfReversedStream(&obp, out, bit);
-        }
-      }
-    }
-  }
-}
-
-/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
-return value is error**/
-static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                    unsigned w, unsigned h,
-                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings)
-{
-  /*
-  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
-  *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter
-  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
-  */
-  unsigned bpp = lodepng_get_bpp(&info_png->color);
-  unsigned error = 0;
-
-  if(info_png->interlace_method == 0)
-  {
-    *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/
-    *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
-      if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
-      {
-        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8));
-        if(!padded) error = 83; /*alloc fail*/
-        if(!error)
-        {
-          addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h);
-          error = filter(*out, padded, w, h, &info_png->color, settings);
-        }
-        lodepng_free(padded);
-      }
-      else
-      {
-        /*we can immediately filter into the out buffer, no other steps needed*/
-        error = filter(*out, in, w, h, &info_png->color, settings);
-      }
-    }
-  }
-  else /*interlace_method is 1 (Adam7)*/
-  {
-    unsigned passw[7], passh[7];
-    size_t filter_passstart[8], padded_passstart[8], passstart[8];
-    unsigned char* adam7;
-
-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
-    *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!(*out)) error = 83; /*alloc fail*/
-
-    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
-    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      unsigned i;
-
-      Adam7_interlace(adam7, in, w, h, bpp);
-      for(i = 0; i != 7; ++i)
-      {
-        if(bpp < 8)
-        {
-          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
-          if(!padded) ERROR_BREAK(83); /*alloc fail*/
-          addPaddingBits(padded, &adam7[passstart[i]],
-                         ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]);
-          error = filter(&(*out)[filter_passstart[i]], padded,
-                         passw[i], passh[i], &info_png->color, settings);
-          lodepng_free(padded);
-        }
-        else
-        {
-          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
-                         passw[i], passh[i], &info_png->color, settings);
-        }
-
-        if(error) break;
-      }
-    }
-
-    lodepng_free(adam7);
-  }
-
-  return error;
-}
-
-/*
-palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
-returns 0 if the palette is opaque,
-returns 1 if the palette has a single color with alpha 0 ==> color key
-returns 2 if the palette is semi-translucent.
-*/
-static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize)
-{
-  size_t i;
-  unsigned key = 0;
-  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
-  for(i = 0; i != palettesize; ++i)
-  {
-    if(!key && palette[4 * i + 3] == 0)
-    {
-      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
-      key = 1;
-      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
-    }
-    else if(palette[4 * i + 3] != 255) return 2;
-    /*when key, no opaque RGB may have key's RGB*/
-    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
-  }
-  return key;
-}
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize)
-{
-  unsigned char* inchunk = data;
-  while((size_t)(inchunk - data) < datasize)
-  {
-    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
-    out->allocsize = out->size; /*fix the allocsize again*/
-    inchunk = lodepng_chunk_next(inchunk);
-  }
-  return 0;
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
-                        const unsigned char* image, unsigned w, unsigned h,
-                        LodePNGState* state)
-{
-  LodePNGInfo info;
-  ucvector outv;
-  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
-  size_t datasize = 0;
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-  *outsize = 0;
-  state->error = 0;
-
-  lodepng_info_init(&info);
-  lodepng_info_copy(&info, &state->info_png);
-
-  if((info.color.colortype == LCT_PALETTE || state->encoder.force_palette)
-      && (info.color.palettesize == 0 || info.color.palettesize > 256))
-  {
-    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
-    return state->error;
-  }
-
-  if(state->encoder.auto_convert)
-  {
-    state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw);
-  }
-  if(state->error) return state->error;
-
-  if(state->encoder.zlibsettings.btype > 2)
-  {
-    CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/
-  }
-  if(state->info_png.interlace_method > 1)
-  {
-    CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/
-  }
-
-  state->error = checkColorValidity(info.color.colortype, info.color.bitdepth);
-  if(state->error) return state->error; /*error: unexisting color type given*/
-  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
-  if(state->error) return state->error; /*error: unexisting color type given*/
-
-  if(!lodepng_color_mode_equal(&state->info_raw, &info.color))
-  {
-    unsigned char* converted;
-    size_t size = (w * h * (size_t)lodepng_get_bpp(&info.color) + 7) / 8;
-
-    converted = (unsigned char*)lodepng_malloc(size);
-    if(!converted && size) state->error = 83; /*alloc fail*/
-    if(!state->error)
-    {
-      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
-    }
-    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
-    lodepng_free(converted);
-  }
-  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
-
-  ucvector_init(&outv);
-  while(!state->error) /*while only executed once, to break on error*/
-  {
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    size_t i;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*write signature and chunks*/
-    writeSignature(&outv);
-    /*IHDR*/
-    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*unknown chunks between IHDR and PLTE*/
-    if(info.unknown_chunks_data[0])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*PLTE*/
-    if(info.color.colortype == LCT_PALETTE)
-    {
-      addChunk_PLTE(&outv, &info.color);
-    }
-    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA))
-    {
-      addChunk_PLTE(&outv, &info.color);
-    }
-    /*tRNS*/
-    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0)
-    {
-      addChunk_tRNS(&outv, &info.color);
-    }
-    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined)
-    {
-      addChunk_tRNS(&outv, &info.color);
-    }
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*bKGD (must come between PLTE and the IDAt chunks*/
-    if(info.background_defined) addChunk_bKGD(&outv, &info);
-    /*pHYs (must come before the IDAT chunks)*/
-    if(info.phys_defined) addChunk_pHYs(&outv, &info);
-
-    /*unknown chunks between PLTE and IDAT*/
-    if(info.unknown_chunks_data[1])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*IDAT (multiple IDAT chunks must be consecutive)*/
-    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
-    if(state->error) break;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*tIME*/
-    if(info.time_defined) addChunk_tIME(&outv, &info.time);
-    /*tEXt and/or zTXt*/
-    for(i = 0; i != info.text_num; ++i)
-    {
-      if(strlen(info.text_keys[i]) > 79)
-      {
-        state->error = 66; /*text chunk too large*/
-        break;
-      }
-      if(strlen(info.text_keys[i]) < 1)
-      {
-        state->error = 67; /*text chunk too small*/
-        break;
-      }
-      if(state->encoder.text_compression)
-      {
-        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
-      }
-      else
-      {
-        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
-      }
-    }
-    /*LodePNG version id in text chunk*/
-    if(state->encoder.add_id)
-    {
-      unsigned alread_added_id_text = 0;
-      for(i = 0; i != info.text_num; ++i)
-      {
-        if(!strcmp(info.text_keys[i], "LodePNG"))
-        {
-          alread_added_id_text = 1;
-          break;
-        }
-      }
-      if(alread_added_id_text == 0)
-      {
-        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
-      }
-    }
-    /*iTXt*/
-    for(i = 0; i != info.itext_num; ++i)
-    {
-      if(strlen(info.itext_keys[i]) > 79)
-      {
-        state->error = 66; /*text chunk too large*/
-        break;
-      }
-      if(strlen(info.itext_keys[i]) < 1)
-      {
-        state->error = 67; /*text chunk too small*/
-        break;
-      }
-      addChunk_iTXt(&outv, state->encoder.text_compression,
-                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
-                    &state->encoder.zlibsettings);
-    }
-
-    /*unknown chunks between IDAT and IEND*/
-    if(info.unknown_chunks_data[2])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    addChunk_IEND(&outv);
-
-    break; /*this isn't really a while loop; no error happened so break out now!*/
-  }
-
-  lodepng_info_cleanup(&info);
-  lodepng_free(data);
-  /*instead of cleaning the vector up, give it to the output*/
-  *out = outv.data;
-  *outsize = outv.size;
-
-  return state->error;
-}
-
-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
-                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned error;
-  LodePNGState state;
-  lodepng_state_init(&state);
-  state.info_raw.colortype = colortype;
-  state.info_raw.bitdepth = bitdepth;
-  state.info_png.color.colortype = colortype;
-  state.info_png.color.bitdepth = bitdepth;
-  lodepng_encode(out, outsize, image, w, h, &state);
-  error = state.error;
-  lodepng_state_cleanup(&state);
-  return error;
-}
-
-unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
-}
-
-unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
-                             LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
-  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
-}
-
-unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
-}
-#endif /*LODEPNG_COMPILE_DISK*/
-
-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings)
-{
-  lodepng_compress_settings_init(&settings->zlibsettings);
-  settings->filter_palette_zero = 1;
-  settings->filter_strategy = LFS_MINSUM;
-  settings->auto_convert = 1;
-  settings->force_palette = 0;
-  settings->predefined_filters = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  settings->add_id = 0;
-  settings->text_compression = 1;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ERROR_TEXT
-/*
-This returns the description of a numerical error code in English. This is also
-the documentation of all the error codes.
-*/
-const char* lodepng_error_text(unsigned code)
-{
-  switch(code)
-  {
-    case 0: return "no error, everything went ok";
-    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
-    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
-    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
-    case 13: return "problem while processing dynamic deflate block";
-    case 14: return "problem while processing dynamic deflate block";
-    case 15: return "problem while processing dynamic deflate block";
-    case 16: return "unexisting code while processing dynamic deflate block";
-    case 17: return "end of out buffer memory reached while inflating";
-    case 18: return "invalid distance code while inflating";
-    case 19: return "end of out buffer memory reached while inflating";
-    case 20: return "invalid deflate block BTYPE encountered while decoding";
-    case 21: return "NLEN is not ones complement of LEN in a deflate block";
-     /*end of out buffer memory reached while inflating:
-     This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
-     all the pixels of the image, given the color depth and image dimensions. Something that doesn't
-     happen in a normal, well encoded, PNG image.*/
-    case 22: return "end of out buffer memory reached while inflating";
-    case 23: return "end of in buffer memory reached while inflating";
-    case 24: return "invalid FCHECK in zlib header";
-    case 25: return "invalid compression method in zlib header";
-    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
-    case 27: return "PNG file is smaller than a PNG header";
-    /*Checks the magic file header, the first 8 bytes of the PNG file*/
-    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
-    case 29: return "first chunk is not the header chunk";
-    case 30: return "chunk length too large, chunk broken off at end of file";
-    case 31: return "illegal PNG color type or bpp";
-    case 32: return "illegal PNG compression method";
-    case 33: return "illegal PNG filter method";
-    case 34: return "illegal PNG interlace method";
-    case 35: return "chunk length of a chunk is too large or the chunk too small";
-    case 36: return "illegal PNG filter type encountered";
-    case 37: return "illegal bit depth for this color type given";
-    case 38: return "the palette is too big"; /*more than 256 colors*/
-    case 39: return "more palette alpha values given in tRNS chunk than there are colors in the palette";
-    case 40: return "tRNS chunk has wrong size for greyscale image";
-    case 41: return "tRNS chunk has wrong size for RGB image";
-    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
-    case 43: return "bKGD chunk has wrong size for palette image";
-    case 44: return "bKGD chunk has wrong size for greyscale image";
-    case 45: return "bKGD chunk has wrong size for RGB image";
-    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
-    case 49: return "jumped past memory while generating dynamic huffman tree";
-    case 50: return "jumped past memory while generating dynamic huffman tree";
-    case 51: return "jumped past memory while inflating huffman block";
-    case 52: return "jumped past memory while inflating";
-    case 53: return "size of zlib data too small";
-    case 54: return "repeat symbol in tree while there was no value symbol yet";
-    /*jumped past tree while generating huffman tree, this could be when the
-    tree will have more leaves than symbols after generating it out of the
-    given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
-    case 55: return "jumped past tree while generating huffman tree";
-    case 56: return "given output image colortype or bitdepth not supported for color conversion";
-    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
-    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
-    case 59: return "requested color conversion not supported";
-    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
-    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
-    /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/
-    case 62: return "conversion from color to greyscale not supported";
-    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; /*(2^31-1)*/
-    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
-    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
-    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
-    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
-    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
-    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
-    case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)";
-    case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)";
-    case 73: return "invalid tIME chunk size";
-    case 74: return "invalid pHYs chunk size";
-    /*length could be wrong, or data chopped off*/
-    case 75: return "no null termination char found while decoding text chunk";
-    case 76: return "iTXt chunk too short to contain required bytes";
-    case 77: return "integer overflow in buffer size";
-    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
-    case 79: return "failed to open file for writing";
-    case 80: return "tried creating a tree of 0 symbols";
-    case 81: return "lazy matching at pos 0 is impossible";
-    case 82: return "color conversion to palette requested while a color isn't in palette";
-    case 83: return "memory allocation failed";
-    case 84: return "given image too small to contain all pixels to be encoded";
-    case 86: return "impossible offset in lz77 encoding (internal bug)";
-    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
-    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
-    case 89: return "text chunk keyword too short or long: must have size 1-79";
-    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
-    case 90: return "windowsize must be a power of two";
-    case 91: return "invalid decompressed idat size";
-    case 92: return "too many pixels, not supported";
-    case 93: return "zero width or height is invalid";
-    case 94: return "header chunk must have a size of 13 bytes";
-  }
-  return "unknown error code";
-}
-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // C++ Wrapper                                                          // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_CPP
-namespace lodepng
-{
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename)
-{
-  long size = lodepng_filesize(filename.c_str());
-  if(size < 0) return 78;
-  buffer.resize((size_t)size);
-  return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
-}
-
-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename)
-{
-  return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
-}
-#endif /* LODEPNG_COMPILE_DISK */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_DECODER
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                    const LodePNGDecompressSettings& settings)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize = 0;
-  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                    const LodePNGDecompressSettings& settings)
-{
-  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
-}
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                  const LodePNGCompressSettings& settings)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize = 0;
-  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                  const LodePNGCompressSettings& settings)
-{
-  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
-}
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_ZLIB */
-
-
-#ifdef LODEPNG_COMPILE_PNG
-
-State::State()
-{
-  lodepng_state_init(this);
-}
-
-State::State(const State& other)
-{
-  lodepng_state_init(this);
-  lodepng_state_copy(this, &other);
-}
-
-State::~State()
-{
-  lodepng_state_cleanup(this);
-}
-
-State& State::operator=(const State& other)
-{
-  lodepng_state_copy(this, &other);
-  return *this;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
-                size_t insize, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
-  if(buffer && !error)
-  {
-    State state;
-    state.info_raw.colortype = colortype;
-    state.info_raw.bitdepth = bitdepth;
-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth)
-{
-  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const unsigned char* in, size_t insize)
-{
-  unsigned char* buffer = NULL;
-  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
-  if(buffer && !error)
-  {
-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-  }
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const std::vector<unsigned char>& in)
-{
-  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  std::vector<unsigned char> buffer;
-  unsigned error = load_file(buffer, filename);
-  if(error) return error;
-  return decode(out, w, h, buffer, colortype, bitdepth);
-}
-#endif /* LODEPNG_COMPILE_DECODER */
-#endif /* LODEPNG_COMPILE_DISK */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
-  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                State& state)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                State& state)
-{
-  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
-  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned encode(const std::string& filename,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  std::vector<unsigned char> buffer;
-  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
-  if(!error) error = save_file(buffer, filename);
-  return error;
-}
-
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
-  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
-}
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_PNG */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/

+ 0 - 1759
3rdparty/lodepng/lodepng.h

@@ -1,1759 +0,0 @@
-/*
-LodePNG version 20160501
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-*/
-
-#ifndef LODEPNG_H
-#define LODEPNG_H
-
-#include <string.h> /*for size_t*/
-
-extern const char* LODEPNG_VERSION_STRING;
-
-/*
-The following #defines are used to create code sections. They can be disabled
-to disable code sections, which can give faster compile time and smaller binary.
-The "NO_COMPILE" defines are designed to be used to pass as defines to the
-compiler command to disable them without modifying this header, e.g.
--DLODEPNG_NO_COMPILE_ZLIB for gcc.
-In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
-allow implementing a custom lodepng_crc32.
-*/
-/*deflate & zlib. If disabled, you must specify alternative zlib functions in
-the custom_zlib field of the compress and decompress settings*/
-#ifndef LODEPNG_NO_COMPILE_ZLIB
-#define LODEPNG_COMPILE_ZLIB
-#endif
-/*png encoder and png decoder*/
-#ifndef LODEPNG_NO_COMPILE_PNG
-#define LODEPNG_COMPILE_PNG
-#endif
-/*deflate&zlib decoder and png decoder*/
-#ifndef LODEPNG_NO_COMPILE_DECODER
-#define LODEPNG_COMPILE_DECODER
-#endif
-/*deflate&zlib encoder and png encoder*/
-#ifndef LODEPNG_NO_COMPILE_ENCODER
-#define LODEPNG_COMPILE_ENCODER
-#endif
-/*the optional built in harddisk file loading and saving functions*/
-#ifndef LODEPNG_NO_COMPILE_DISK
-#define LODEPNG_COMPILE_DISK
-#endif
-/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
-#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
-#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
-#endif
-/*ability to convert error numerical codes to English text string*/
-#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
-#define LODEPNG_COMPILE_ERROR_TEXT
-#endif
-/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
-you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
-source files with custom allocators.*/
-#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
-#define LODEPNG_COMPILE_ALLOCATORS
-#endif
-/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
-#ifdef __cplusplus
-#ifndef LODEPNG_NO_COMPILE_CPP
-#define LODEPNG_COMPILE_CPP
-#endif
-#endif
-
-#ifdef LODEPNG_COMPILE_CPP
-#include <vector>
-#include <string>
-#endif /*LODEPNG_COMPILE_CPP*/
-
-#ifdef LODEPNG_COMPILE_PNG
-/*The PNG color types (also used for raw).*/
-typedef enum LodePNGColorType
-{
-  LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/
-  LCT_RGB = 2, /*RGB: 8,16 bit*/
-  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
-  LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/
-  LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/
-} LodePNGColorType;
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Converts PNG data in memory to raw pixel data.
-out: Output parameter. Pointer to buffer that will contain the raw pixel data.
-     After decoding, its size is w * h * (bytes per pixel) bytes larger than
-     initially. Bytes per pixel depends on colortype and bitdepth.
-     Must be freed after usage with free(*out).
-     Note: for 16-bit per channel colors, uses big endian format like PNG does.
-w: Output parameter. Pointer to width of pixel data.
-h: Output parameter. Pointer to height of pixel data.
-in: Memory buffer with the PNG file.
-insize: size of the in buffer.
-colortype: the desired color type for the raw output image. See explanation on PNG color types.
-bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
-Return value: LodePNG error code (0 means no error).
-*/
-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
-                               const unsigned char* in, size_t insize,
-                               LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
-                          const unsigned char* in, size_t insize);
-
-/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
-                          const unsigned char* in, size_t insize);
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load PNG from disk, from file with given name.
-Same as the other decode functions, but instead takes a filename as input.
-*/
-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
-                             const char* filename,
-                             LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
-                               const char* filename);
-
-/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
-                               const char* filename);
-#endif /*LODEPNG_COMPILE_DISK*/
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
-  of the output PNG image cannot be chosen, they are automatically determined
-  by the colortype, bitdepth and content of the input pixel data.
-  Note: for 16-bit per channel colors, needs big endian format like PNG does.
-out: Output parameter. Pointer to buffer that will contain the PNG image data.
-     Must be freed after usage with free(*out).
-outsize: Output parameter. Pointer to the size in bytes of the out buffer.
-image: The raw pixel data to encode. The size of this buffer should be
-       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
-w: width of the raw pixel data in pixels.
-h: height of the raw pixel data in pixels.
-colortype: the color type of the raw input image. See explanation on PNG color types.
-bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
-Return value: LodePNG error code (0 means no error).
-*/
-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
-                               const unsigned char* image, unsigned w, unsigned h,
-                               LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
-unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
-                          const unsigned char* image, unsigned w, unsigned h);
-
-/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
-unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
-                          const unsigned char* image, unsigned w, unsigned h);
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts raw pixel data into a PNG file on disk.
-Same as the other encode functions, but instead takes a filename as output.
-NOTE: This overwrites existing files without warning!
-*/
-unsigned lodepng_encode_file(const char* filename,
-                             const unsigned char* image, unsigned w, unsigned h,
-                             LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
-unsigned lodepng_encode32_file(const char* filename,
-                               const unsigned char* image, unsigned w, unsigned h);
-
-/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
-unsigned lodepng_encode24_file(const char* filename,
-                               const unsigned char* image, unsigned w, unsigned h);
-#endif /*LODEPNG_COMPILE_DISK*/
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-
-#ifdef LODEPNG_COMPILE_CPP
-namespace lodepng
-{
-#ifdef LODEPNG_COMPILE_DECODER
-/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
-is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const unsigned char* in, size_t insize,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts PNG file from disk to raw pixel data in memory.
-Same as the other decode functions, but instead takes a filename as input.
-*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::string& filename,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
-is that of the raw input data. The output PNG color type will be auto chosen.*/
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts 32-bit RGBA raw pixel data into a PNG file on disk.
-Same as the other encode functions, but instead takes a filename as output.
-NOTE: This overwrites existing files without warning!
-*/
-unsigned encode(const std::string& filename,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_ENCODER */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ERROR_TEXT
-/*Returns an English description of the numerical error code.*/
-const char* lodepng_error_text(unsigned code);
-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*Settings for zlib decompression*/
-typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
-struct LodePNGDecompressSettings
-{
-  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
-
-  /*use custom zlib decoder instead of built in one (default: null)*/
-  unsigned (*custom_zlib)(unsigned char**, size_t*,
-                          const unsigned char*, size_t,
-                          const LodePNGDecompressSettings*);
-  /*use custom deflate decoder instead of built in one (default: null)
-  if custom_zlib is used, custom_deflate is ignored since only the built in
-  zlib function will call custom_deflate*/
-  unsigned (*custom_inflate)(unsigned char**, size_t*,
-                             const unsigned char*, size_t,
-                             const LodePNGDecompressSettings*);
-
-  const void* custom_context; /*optional custom settings for custom functions*/
-};
-
-extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Settings for zlib compression. Tweaking these settings tweaks the balance
-between speed and compression ratio.
-*/
-typedef struct LodePNGCompressSettings LodePNGCompressSettings;
-struct LodePNGCompressSettings /*deflate = compress*/
-{
-  /*LZ77 related settings*/
-  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
-  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
-  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
-  unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
-  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
-  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
-
-  /*use custom zlib encoder instead of built in one (default: null)*/
-  unsigned (*custom_zlib)(unsigned char**, size_t*,
-                          const unsigned char*, size_t,
-                          const LodePNGCompressSettings*);
-  /*use custom deflate encoder instead of built in one (default: null)
-  if custom_zlib is used, custom_deflate is ignored since only the built in
-  zlib function will call custom_deflate*/
-  unsigned (*custom_deflate)(unsigned char**, size_t*,
-                             const unsigned char*, size_t,
-                             const LodePNGCompressSettings*);
-
-  const void* custom_context; /*optional custom settings for custom functions*/
-};
-
-extern const LodePNGCompressSettings lodepng_default_compress_settings;
-void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_PNG
-/*
-Color mode of an image. Contains all information required to decode the pixel
-bits to RGBA colors. This information is the same as used in the PNG file
-format, and is used both for PNG and raw image data in LodePNG.
-*/
-typedef struct LodePNGColorMode
-{
-  /*header (IHDR)*/
-  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
-  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
-
-  /*
-  palette (PLTE and tRNS)
-
-  Dynamically allocated with the colors of the palette, including alpha.
-  When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use
-  lodepng_palette_clear, then for each color use lodepng_palette_add.
-  If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette.
-
-  When decoding, by default you can ignore this palette, since LodePNG already
-  fills the palette colors in the pixels of the raw RGBA output.
-
-  The palette is only supported for color type 3.
-  */
-  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
-  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
-
-  /*
-  transparent color key (tRNS)
-
-  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
-  For greyscale PNGs, r, g and b will all 3 be set to the same.
-
-  When decoding, by default you can ignore this information, since LodePNG sets
-  pixels with this key to transparent already in the raw RGBA output.
-
-  The color key is only supported for color types 0 and 2.
-  */
-  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
-  unsigned key_r;       /*red/greyscale component of color key*/
-  unsigned key_g;       /*green component of color key*/
-  unsigned key_b;       /*blue component of color key*/
-} LodePNGColorMode;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_color_mode_init(LodePNGColorMode* info);
-void lodepng_color_mode_cleanup(LodePNGColorMode* info);
-/*return value is error code (0 means no error)*/
-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
-
-void lodepng_palette_clear(LodePNGColorMode* info);
-/*add 1 color to the palette*/
-unsigned lodepng_palette_add(LodePNGColorMode* info,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
-
-/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
-unsigned lodepng_get_bpp(const LodePNGColorMode* info);
-/*get the amount of color channels used, based on colortype in the struct.
-If a palette is used, it counts as 1 channel.*/
-unsigned lodepng_get_channels(const LodePNGColorMode* info);
-/*is it a greyscale type? (only colortype 0 or 4)*/
-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
-/*has it got an alpha channel? (only colortype 2 or 6)*/
-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
-/*has it got a palette? (only colortype 3)*/
-unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
-/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
-Loops through the palette to check this.*/
-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
-/*
-Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
-Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
-Returns false if the image can only have opaque pixels.
-In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
-or if "key_defined" is true.
-*/
-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
-/*Returns the byte size of a raw image buffer with given width, height and color mode*/
-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*The information of a Time chunk in PNG.*/
-typedef struct LodePNGTime
-{
-  unsigned year;    /*2 bytes used (0-65535)*/
-  unsigned month;   /*1-12*/
-  unsigned day;     /*1-31*/
-  unsigned hour;    /*0-23*/
-  unsigned minute;  /*0-59*/
-  unsigned second;  /*0-60 (to allow for leap seconds)*/
-} LodePNGTime;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*Information about the PNG image, except pixels, width and height.*/
-typedef struct LodePNGInfo
-{
-  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
-  unsigned compression_method;/*compression method of the original file. Always 0.*/
-  unsigned filter_method;     /*filter method of the original file*/
-  unsigned interlace_method;  /*interlace method of the original file*/
-  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  /*
-  suggested background color chunk (bKGD)
-  This color uses the same color mode as the PNG (except alpha channel), which can be 1-bit to 16-bit.
-
-  For greyscale PNGs, r, g and b will all 3 be set to the same. When encoding
-  the encoder writes the red one. For palette PNGs: When decoding, the RGB value
-  will be stored, not a palette index. But when encoding, specify the index of
-  the palette in background_r, the other two are then ignored.
-
-  The decoder does not use this background color to edit the color of pixels.
-  */
-  unsigned background_defined; /*is a suggested background color given?*/
-  unsigned background_r;       /*red component of suggested background color*/
-  unsigned background_g;       /*green component of suggested background color*/
-  unsigned background_b;       /*blue component of suggested background color*/
-
-  /*
-  non-international text chunks (tEXt and zTXt)
-
-  The char** arrays each contain num strings. The actual messages are in
-  text_strings, while text_keys are keywords that give a short description what
-  the actual text represents, e.g. Title, Author, Description, or anything else.
-
-  A keyword is minimum 1 character and maximum 79 characters long. It's
-  discouraged to use a single line length longer than 79 characters for texts.
-
-  Don't allocate these text buffers yourself. Use the init/cleanup functions
-  correctly and use lodepng_add_text and lodepng_clear_text.
-  */
-  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
-  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
-  char** text_strings; /*the actual text*/
-
-  /*
-  international text chunks (iTXt)
-  Similar to the non-international text chunks, but with additional strings
-  "langtags" and "transkeys".
-  */
-  size_t itext_num; /*the amount of international texts in this PNG*/
-  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
-  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
-  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
-  char** itext_strings; /*the actual international text - UTF-8 string*/
-
-  /*time chunk (tIME)*/
-  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
-  LodePNGTime time;
-
-  /*phys chunk (pHYs)*/
-  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
-  unsigned phys_x; /*pixels per unit in x direction*/
-  unsigned phys_y; /*pixels per unit in y direction*/
-  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
-
-  /*
-  unknown chunks
-  There are 3 buffers, one for each position in the PNG where unknown chunks can appear
-  each buffer contains all unknown chunks for that position consecutively
-  The 3 buffers are the unknown chunks between certain critical chunks:
-  0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
-  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
-  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
-  */
-  unsigned char* unknown_chunks_data[3];
-  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGInfo;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_info_init(LodePNGInfo* info);
-void lodepng_info_cleanup(LodePNGInfo* info);
-/*return value is error code (0 means no error)*/
-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
-
-void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*
-Converts raw buffer from one color type to another color type, based on
-LodePNGColorMode structs to describe the input and output color type.
-See the reference manual at the end of this header file to see which color conversions are supported.
-return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
-The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
-of the output color type (lodepng_get_bpp).
-For < 8 bpp images, there should not be padding bits at the end of scanlines.
-For 16-bit per channel colors, uses big endian format like PNG does.
-Return value is LodePNG error code
-*/
-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
-                         unsigned w, unsigned h);
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Settings for the decoder. This contains settings for the PNG and the Zlib
-decoder, but not the Info settings from the Info structs.
-*/
-typedef struct LodePNGDecoderSettings
-{
-  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
-
-  unsigned ignore_crc; /*ignore CRC checksums*/
-
-  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
-  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
-  unsigned remember_unknown_chunks;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGDecoderSettings;
-
-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
-typedef enum LodePNGFilterStrategy
-{
-  /*every filter at zero*/
-  LFS_ZERO,
-  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
-  LFS_MINSUM,
-  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
-  on the image, this is better or worse than minsum.*/
-  LFS_ENTROPY,
-  /*
-  Brute-force-search PNG filters by compressing each filter for each scanline.
-  Experimental, very slow, and only rarely gives better compression than MINSUM.
-  */
-  LFS_BRUTE_FORCE,
-  /*use predefined_filters buffer: you specify the filter type for each scanline*/
-  LFS_PREDEFINED
-} LodePNGFilterStrategy;
-
-/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding.
-Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
-typedef struct LodePNGColorProfile
-{
-  unsigned colored; /*not greyscale*/
-  unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/
-  unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/
-  unsigned short key_g;
-  unsigned short key_b;
-  unsigned alpha; /*alpha channel or alpha palette required*/
-  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/
-  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/
-  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/
-} LodePNGColorProfile;
-
-void lodepng_color_profile_init(LodePNGColorProfile* profile);
-
-/*Get a LodePNGColorProfile of the image.*/
-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in);
-/*The function LodePNG uses internally to decide the PNG color with auto_convert.
-Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in);
-
-/*Settings for the encoder.*/
-typedef struct LodePNGEncoderSettings
-{
-  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
-
-  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
-
-  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
-  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
-  completely follow the official PNG heuristic, filter_palette_zero must be true and
-  filter_strategy must be LFS_MINSUM*/
-  unsigned filter_palette_zero;
-  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
-  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
-  LodePNGFilterStrategy filter_strategy;
-  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
-  the same length as the amount of scanlines in the image, and each value must <= 5. You
-  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
-  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
-  const unsigned char* predefined_filters;
-
-  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
-  If colortype is 3, PLTE is _always_ created.*/
-  unsigned force_palette;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  /*add LodePNG identifier and version as a text chunk, for debugging*/
-  unsigned add_id;
-  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
-  unsigned text_compression;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGEncoderSettings;
-
-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-
-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
-/*The settings, state and information for extended encoding and decoding.*/
-typedef struct LodePNGState
-{
-#ifdef LODEPNG_COMPILE_DECODER
-  LodePNGDecoderSettings decoder; /*the decoding settings*/
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-  LodePNGEncoderSettings encoder; /*the encoding settings*/
-#endif /*LODEPNG_COMPILE_ENCODER*/
-  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
-  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
-  unsigned error;
-#ifdef LODEPNG_COMPILE_CPP
-  /* For the lodepng::State subclass. */
-  virtual ~LodePNGState(){}
-#endif
-} LodePNGState;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_state_init(LodePNGState* state);
-void lodepng_state_cleanup(LodePNGState* state);
-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
-getting much more information about the PNG image and color mode.
-*/
-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
-                        LodePNGState* state,
-                        const unsigned char* in, size_t insize);
-
-/*
-Read the PNG header, but not the actual data. This returns only the information
-that is in the header chunk of the PNG, such as width, height and color type. The
-information is placed in the info_png field of the LodePNGState.
-*/
-unsigned lodepng_inspect(unsigned* w, unsigned* h,
-                         LodePNGState* state,
-                         const unsigned char* in, size_t insize);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
-                        const unsigned char* image, unsigned w, unsigned h,
-                        LodePNGState* state);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*
-The lodepng_chunk functions are normally not needed, except to traverse the
-unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
-It also allows traversing the chunks of an encoded PNG file yourself.
-
-PNG standard chunk naming conventions:
-First byte: uppercase = critical, lowercase = ancillary
-Second byte: uppercase = public, lowercase = private
-Third byte: must be uppercase
-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
-*/
-
-/*
-Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
-There must be at least 4 bytes to read from. If the result value is too large,
-it may be corrupt data.
-*/
-unsigned lodepng_chunk_length(const unsigned char* chunk);
-
-/*puts the 4-byte type in null terminated string*/
-void lodepng_chunk_type(char type[5], const unsigned char* chunk);
-
-/*check if the type is the given type*/
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
-
-/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
-
-/*0: public, 1: private (see PNG standard)*/
-unsigned char lodepng_chunk_private(const unsigned char* chunk);
-
-/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
-
-/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
-unsigned char* lodepng_chunk_data(unsigned char* chunk);
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
-
-/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
-
-/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
-void lodepng_chunk_generate_crc(unsigned char* chunk);
-
-/*iterate to next chunks. don't use on IEND chunk, as there is no next chunk then*/
-unsigned char* lodepng_chunk_next(unsigned char* chunk);
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
-
-/*
-Appends chunk to the data in out. The given chunk should already have its chunk header.
-The out variable and outlength are updated to reflect the new reallocated buffer.
-Returns error code (0 if it went ok)
-*/
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
-
-/*
-Appends new chunk to out. The chunk to append is given by giving its length, type
-and data separately. The type is a 4-letter string.
-The out variable and outlength are updated to reflect the new reallocated buffer.
-Returne error code (0 if it went ok)
-*/
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data);
-
-
-/*Calculate CRC32 of buffer*/
-unsigned lodepng_crc32(const unsigned char* buf, size_t len);
-#endif /*LODEPNG_COMPILE_PNG*/
-
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*
-This zlib part can be used independently to zlib compress and decompress a
-buffer. It cannot be used to create gzip files however, and it only supports the
-part of zlib that is required for PNG, it does not support dictionaries.
-*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGDecompressSettings* settings);
-
-/*
-Decompresses Zlib data. Reallocates the out buffer and appends the data. The
-data must be according to the zlib specification.
-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
-buffer and *outsize its size in bytes. out must be freed by user after usage.
-*/
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
-                                 const unsigned char* in, size_t insize,
-                                 const LodePNGDecompressSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Compresses data with Zlib. Reallocates the out buffer and appends the data.
-Zlib adds a small header and trailer around the deflate data.
-The data is output in the format of the zlib specification.
-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
-buffer and *outsize its size in bytes. out must be freed by user after usage.
-*/
-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
-                               const unsigned char* in, size_t insize,
-                               const LodePNGCompressSettings* settings);
-
-/*
-Find length-limited Huffman code for given frequencies. This function is in the
-public interface only for tests, it's used internally by lodepng_deflate.
-*/
-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
-                                      size_t numcodes, unsigned maxbitlen);
-
-/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGCompressSettings* settings);
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load a file from disk into buffer. The function allocates the out buffer, and
-after usage you should free it.
-out: output parameter, contains pointer to loaded buffer.
-outsize: output parameter, size of the allocated out buffer
-filename: the path to the file to load
-return value: error code (0 means ok)
-*/
-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
-
-/*
-Save a file from buffer to disk. Warning, if it exists, this function overwrites
-the file without warning!
-buffer: the buffer to write
-buffersize: size of the buffer to write
-filename: the path to the file to save to
-return value: error code (0 means ok)
-*/
-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
-#endif /*LODEPNG_COMPILE_DISK*/
-
-#ifdef LODEPNG_COMPILE_CPP
-/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
-namespace lodepng
-{
-#ifdef LODEPNG_COMPILE_PNG
-class State : public LodePNGState
-{
-  public:
-    State();
-    State(const State& other);
-    virtual ~State();
-    State& operator=(const State& other);
-};
-
-#ifdef LODEPNG_COMPILE_DECODER
-/* Same as other lodepng::decode, but using a State for more settings and information. */
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const unsigned char* in, size_t insize);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const std::vector<unsigned char>& in);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/* Same as other lodepng::encode, but using a State for more settings and information. */
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                State& state);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                State& state);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load a file from disk into an std::vector.
-return value: error code (0 means ok)
-*/
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
-
-/*
-Save the binary data in an std::vector to a file on disk. The file is overwritten
-without warning.
-*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_PNG */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_DECODER
-/* Zlib-decompress an unsigned char buffer */
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
-
-/* Zlib-decompress an std::vector */
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/* Zlib-compress an unsigned char buffer */
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
-
-/* Zlib-compress an std::vector */
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_ZLIB */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
-
-/*
-TODO:
-[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
-[.] check compatibility with various compilers  - done but needs to be redone for every newer version
-[X] converting color to 16-bit per channel types
-[ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values)
-[ ] make sure encoder generates no chunks with size > (2^31)-1
-[ ] partial decoding (stream processing)
-[X] let the "isFullyOpaque" function check color keys and transparent palettes too
-[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
-[ ] don't stop decoding on errors like 69, 57, 58 (make warnings)
-[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
-[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
-[ ] allow user to give data (void*) to custom allocator
-*/
-
-#endif /*LODEPNG_H inclusion guard*/
-
-/*
-LodePNG Documentation
----------------------
-
-0. table of contents
---------------------
-
-  1. about
-   1.1. supported features
-   1.2. features not supported
-  2. C and C++ version
-  3. security
-  4. decoding
-  5. encoding
-  6. color conversions
-    6.1. PNG color types
-    6.2. color conversions
-    6.3. padding bits
-    6.4. A note about 16-bits per channel and endianness
-  7. error values
-  8. chunks and PNG editing
-  9. compiler support
-  10. examples
-   10.1. decoder C++ example
-   10.2. decoder C example
-  11. state settings reference
-  12. changes
-  13. contact information
-
-
-1. about
---------
-
-PNG is a file format to store raster images losslessly with good compression,
-supporting different color types and alpha channel.
-
-LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
-Specification (Second Edition) - W3C Recommendation 10 November 2003.
-
-The specifications used are:
-
-*) Portable Network Graphics (PNG) Specification (Second Edition):
-     http://www.w3.org/TR/2003/REC-PNG-20031110
-*) RFC 1950 ZLIB Compressed Data Format version 3.3:
-     http://www.gzip.org/zlib/rfc-zlib.html
-*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
-     http://www.gzip.org/zlib/rfc-deflate.html
-
-The most recent version of LodePNG can currently be found at
-http://lodev.org/lodepng/
-
-LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
-extra functionality.
-
-LodePNG exists out of two files:
--lodepng.h: the header file for both C and C++
--lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
-
-If you want to start using LodePNG right away without reading this doc, get the
-examples from the LodePNG website to see how to use it in code, or check the
-smaller examples in chapter 13 here.
-
-LodePNG is simple but only supports the basic requirements. To achieve
-simplicity, the following design choices were made: There are no dependencies
-on any external library. There are functions to decode and encode a PNG with
-a single function call, and extended versions of these functions taking a
-LodePNGState struct allowing to specify or get more information. By default
-the colors of the raw image are always RGB or RGBA, no matter what color type
-the PNG file uses. To read and write files, there are simple functions to
-convert the files to/from buffers in memory.
-
-This all makes LodePNG suitable for loading textures in games, demos and small
-programs, ... It's less suitable for full fledged image editors, loading PNGs
-over network (it requires all the image data to be available before decoding can
-begin), life-critical systems, ...
-
-1.1. supported features
------------------------
-
-The following features are supported by the decoder:
-
-*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
-   or the same color type as the PNG
-*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
-*) Adam7 interlace and deinterlace for any color type
-*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
-*) support for alpha channels, including RGBA color model, translucent palettes and color keying
-*) zlib decompression (inflate)
-*) zlib compression (deflate)
-*) CRC32 and ADLER32 checksums
-*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
-*) the following chunks are supported (generated/interpreted) by both encoder and decoder:
-    IHDR: header information
-    PLTE: color palette
-    IDAT: pixel data
-    IEND: the final chunk
-    tRNS: transparency for palettized images
-    tEXt: textual information
-    zTXt: compressed textual information
-    iTXt: international textual information
-    bKGD: suggested background color
-    pHYs: physical dimensions
-    tIME: modification time
-
-1.2. features not supported
----------------------------
-
-The following features are _not_ supported:
-
-*) some features needed to make a conformant PNG-Editor might be still missing.
-*) partial loading/stream processing. All data must be available and is processed in one call.
-*) The following public chunks are not supported but treated as unknown chunks by LodePNG
-    cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT
-   Some of these are not supported on purpose: LodePNG wants to provide the RGB values
-   stored in the pixels, not values modified by system dependent gamma or color models.
-
-
-2. C and C++ version
---------------------
-
-The C version uses buffers allocated with alloc that you need to free()
-yourself. You need to use init and cleanup functions for each struct whenever
-using a struct from the C version to avoid exploits and memory leaks.
-
-The C++ version has extra functions with std::vectors in the interface and the
-lodepng::State class which is a LodePNGState with constructor and destructor.
-
-These files work without modification for both C and C++ compilers because all
-the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
-ignore it, and the C code is made to compile both with strict ISO C90 and C++.
-
-To use the C++ version, you need to rename the source file to lodepng.cpp
-(instead of lodepng.c), and compile it with a C++ compiler.
-
-To use the C version, you need to rename the source file to lodepng.c (instead
-of lodepng.cpp), and compile it with a C compiler.
-
-
-3. Security
------------
-
-Even if carefully designed, it's always possible that LodePNG contains possible
-exploits. If you discover one, please let me know, and it will be fixed.
-
-When using LodePNG, care has to be taken with the C version of LodePNG, as well
-as the C-style structs when working with C++. The following conventions are used
-for all C-style structs:
-
--if a struct has a corresponding init function, always call the init function when making a new one
--if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
--if a struct has a corresponding copy function, use the copy function instead of "=".
- The destination must also be inited already.
-
-
-4. Decoding
------------
-
-Decoding converts a PNG compressed image to a raw pixel buffer.
-
-Most documentation on using the decoder is at its declarations in the header
-above. For C, simple decoding can be done with functions such as
-lodepng_decode32, and more advanced decoding can be done with the struct
-LodePNGState and lodepng_decode. For C++, all decoding can be done with the
-various lodepng::decode functions, and lodepng::State can be used for advanced
-features.
-
-When using the LodePNGState, it uses the following fields for decoding:
-*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
-*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
-*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
-
-LodePNGInfo info_png
---------------------
-
-After decoding, this contains extra information of the PNG image, except the actual
-pixels, width and height because these are already gotten directly from the decoder
-functions.
-
-It contains for example the original color type of the PNG image, text comments,
-suggested background color, etc... More details about the LodePNGInfo struct are
-at its declaration documentation.
-
-LodePNGColorMode info_raw
--------------------------
-
-When decoding, here you can specify which color type you want
-the resulting raw image to be. If this is different from the colortype of the
-PNG, then the decoder will automatically convert the result. This conversion
-always works, except if you want it to convert a color PNG to greyscale or to
-a palette with missing colors.
-
-By default, 32-bit color is used for the result.
-
-LodePNGDecoderSettings decoder
-------------------------------
-
-The settings can be used to ignore the errors created by invalid CRC and Adler32
-chunks, and to disable the decoding of tEXt chunks.
-
-There's also a setting color_convert, true by default. If false, no conversion
-is done, the resulting data will be as it was in the PNG (after decompression)
-and you'll have to puzzle the colors of the pixels together yourself using the
-color type information in the LodePNGInfo.
-
-
-5. Encoding
------------
-
-Encoding converts a raw pixel buffer to a PNG compressed image.
-
-Most documentation on using the encoder is at its declarations in the header
-above. For C, simple encoding can be done with functions such as
-lodepng_encode32, and more advanced decoding can be done with the struct
-LodePNGState and lodepng_encode. For C++, all encoding can be done with the
-various lodepng::encode functions, and lodepng::State can be used for advanced
-features.
-
-Like the decoder, the encoder can also give errors. However it gives less errors
-since the encoder input is trusted, the decoder input (a PNG image that could
-be forged by anyone) is not trusted.
-
-When using the LodePNGState, it uses the following fields for encoding:
-*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
-*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
-*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
-
-LodePNGInfo info_png
---------------------
-
-When encoding, you use this the opposite way as when decoding: for encoding,
-you fill in the values you want the PNG to have before encoding. By default it's
-not needed to specify a color type for the PNG since it's automatically chosen,
-but it's possible to choose it yourself given the right settings.
-
-The encoder will not always exactly match the LodePNGInfo struct you give,
-it tries as close as possible. Some things are ignored by the encoder. The
-encoder uses, for example, the following settings from it when applicable:
-colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
-background color, the interlace method, unknown chunks, ...
-
-When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
-If the palette contains any colors for which the alpha channel is not 255 (so
-there are translucent colors in the palette), it'll add a tRNS chunk.
-
-LodePNGColorMode info_raw
--------------------------
-
-You specify the color type of the raw image that you give to the input here,
-including a possible transparent color key and palette you happen to be using in
-your raw image data.
-
-By default, 32-bit color is assumed, meaning your input has to be in RGBA
-format with 4 bytes (unsigned chars) per pixel.
-
-LodePNGEncoderSettings encoder
-------------------------------
-
-The following settings are supported (some are in sub-structs):
-*) auto_convert: when this option is enabled, the encoder will
-automatically choose the smallest possible color mode (including color key) that
-can encode the colors of all pixels without information loss.
-*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
-   2 = dynamic huffman tree (best compression). Should be 2 for proper
-   compression.
-*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
-   true for proper compression.
-*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
-   2048 by default, but can be set to 32768 for better, but slow, compression.
-*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
-   chunk if force_palette is true. This can used as suggested palette to convert
-   to by viewers that don't support more than 256 colors (if those still exist)
-*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
-*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
-  zTXt chunks use zlib compression on the text. This gives a smaller result on
-  large texts but a larger result on small texts (such as a single program name).
-  It's all tEXt or all zTXt though, there's no separate setting per text yet.
-
-
-6. color conversions
---------------------
-
-An important thing to note about LodePNG, is that the color type of the PNG, and
-the color type of the raw image, are completely independent. By default, when
-you decode a PNG, you get the result as a raw image in the color type you want,
-no matter whether the PNG was encoded with a palette, greyscale or RGBA color.
-And if you encode an image, by default LodePNG will automatically choose the PNG
-color type that gives good compression based on the values of colors and amount
-of colors in the image. It can be configured to let you control it instead as
-well, though.
-
-To be able to do this, LodePNG does conversions from one color mode to another.
-It can convert from almost any color type to any other color type, except the
-following conversions: RGB to greyscale is not supported, and converting to a
-palette when the palette doesn't have a required color is not supported. This is
-not supported on purpose: this is information loss which requires a color
-reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey
-is easy, but there are multiple ways if you want to give some channels more
-weight).
-
-By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
-color, no matter what color type the PNG has. And by default when encoding,
-LodePNG automatically picks the best color model for the output PNG, and expects
-the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
-the color format of the images yourself, you can skip this chapter.
-
-6.1. PNG color types
---------------------
-
-A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
-as well as palettized color modes. After the zlib decompression and unfiltering
-in the PNG image is done, the raw pixel data will have that color type and thus
-a certain amount of bits per pixel. If you want the output raw image after
-decoding to have another color type, a conversion is done by LodePNG.
-
-The PNG specification gives the following color types:
-
-0: greyscale, bit depths 1, 2, 4, 8, 16
-2: RGB, bit depths 8 and 16
-3: palette, bit depths 1, 2, 4 and 8
-4: greyscale with alpha, bit depths 8 and 16
-6: RGBA, bit depths 8 and 16
-
-Bit depth is the amount of bits per pixel per color channel. So the total amount
-of bits per pixel is: amount of channels * bitdepth.
-
-6.2. color conversions
-----------------------
-
-As explained in the sections about the encoder and decoder, you can specify
-color types and bit depths in info_png and info_raw to change the default
-behaviour.
-
-If, when decoding, you want the raw image to be something else than the default,
-you need to set the color type and bit depth you want in the LodePNGColorMode,
-or the parameters colortype and bitdepth of the simple decoding function.
-
-If, when encoding, you use another color type than the default in the raw input
-image, you need to specify its color type and bit depth in the LodePNGColorMode
-of the raw image, or use the parameters colortype and bitdepth of the simple
-encoding function.
-
-If, when encoding, you don't want LodePNG to choose the output PNG color type
-but control it yourself, you need to set auto_convert in the encoder settings
-to false, and specify the color type you want in the LodePNGInfo of the
-encoder (including palette: it can generate a palette if auto_convert is true,
-otherwise not).
-
-If the input and output color type differ (whether user chosen or auto chosen),
-LodePNG will do a color conversion, which follows the rules below, and may
-sometimes result in an error.
-
-To avoid some confusion:
--the decoder converts from PNG to raw image
--the encoder converts from raw image to PNG
--the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
--the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
--when encoding, the color type in LodePNGInfo is ignored if auto_convert
- is enabled, it is automatically generated instead
--when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
- PNG image, but it can be ignored since the raw image has the color type you requested instead
--if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
- between the color types is done if the color types are supported. If it is not
- supported, an error is returned. If the types are the same, no conversion is done.
--even though some conversions aren't supported, LodePNG supports loading PNGs from any
- colortype and saving PNGs to any colortype, sometimes it just requires preparing
- the raw image correctly before encoding.
--both encoder and decoder use the same color converter.
-
-Non supported color conversions:
--color to greyscale: no error is thrown, but the result will look ugly because
-only the red channel is taken
--anything to palette when that palette does not have that color in it: in this
-case an error is thrown
-
-Supported color conversions:
--anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
--any grey or grey+alpha, to grey or grey+alpha
--anything to a palette, as long as the palette has the requested colors in it
--removing alpha channel
--higher to smaller bitdepth, and vice versa
-
-If you want no color conversion to be done (e.g. for speed or control):
--In the encoder, you can make it save a PNG with any color type by giving the
-raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
-false.
--In the decoder, you can make it store the pixel data in the same color type
-as the PNG has, by setting the color_convert setting to false. Settings in
-info_raw are then ignored.
-
-The function lodepng_convert does the color conversion. It is available in the
-interface but normally isn't needed since the encoder and decoder already call
-it.
-
-6.3. padding bits
------------------
-
-In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
-have a bit amount that isn't a multiple of 8, then padding bits are used so that each
-scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
-The raw input image you give to the encoder, and the raw output image you get from the decoder
-will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
-of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte,
-not the first bit of a new byte.
-
-6.4. A note about 16-bits per channel and endianness
-----------------------------------------------------
-
-LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
-for any other color format. The 16-bit values are stored in big endian (most
-significant byte first) in these arrays. This is the opposite order of the
-little endian used by x86 CPU's.
-
-LodePNG always uses big endian because the PNG file format does so internally.
-Conversions to other formats than PNG uses internally are not supported by
-LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
-colors, the order in which you store R, G, B and A, and so on. Supporting and
-converting to/from all that is outside the scope of LodePNG.
-
-This may mean that, depending on your use case, you may want to convert the big
-endian output of LodePNG to little endian with a for loop. This is certainly not
-always needed, many applications and libraries support big endian 16-bit colors
-anyway, but it means you cannot simply cast the unsigned char* buffer to an
-unsigned short* buffer on x86 CPUs.
-
-
-7. error values
----------------
-
-All functions in LodePNG that return an error code, return 0 if everything went
-OK, or a non-zero code if there was an error.
-
-The meaning of the LodePNG error values can be retrieved with the function
-lodepng_error_text: given the numerical error code, it returns a description
-of the error in English as a string.
-
-Check the implementation of lodepng_error_text to see the meaning of each code.
-
-
-8. chunks and PNG editing
--------------------------
-
-If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
-editor that should follow the rules about handling of unknown chunks, or if your
-program is able to read other types of chunks than the ones handled by LodePNG,
-then that's possible with the chunk functions of LodePNG.
-
-A PNG chunk has the following layout:
-
-4 bytes length
-4 bytes type name
-length bytes data
-4 bytes CRC
-
-8.1. iterating through chunks
------------------------------
-
-If you have a buffer containing the PNG image data, then the first chunk (the
-IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
-signature of the PNG and are not part of a chunk. But if you start at byte 8
-then you have a chunk, and can check the following things of it.
-
-NOTE: none of these functions check for memory buffer boundaries. To avoid
-exploits, always make sure the buffer contains all the data of the chunks.
-When using lodepng_chunk_next, make sure the returned value is within the
-allocated memory.
-
-unsigned lodepng_chunk_length(const unsigned char* chunk):
-
-Get the length of the chunk's data. The total chunk length is this length + 12.
-
-void lodepng_chunk_type(char type[5], const unsigned char* chunk):
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
-
-Get the type of the chunk or compare if it's a certain type
-
-unsigned char lodepng_chunk_critical(const unsigned char* chunk):
-unsigned char lodepng_chunk_private(const unsigned char* chunk):
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
-
-Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
-Check if the chunk is private (public chunks are part of the standard, private ones not).
-Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
-chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
-program doesn't handle that type of unknown chunk.
-
-unsigned char* lodepng_chunk_data(unsigned char* chunk):
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
-
-Get a pointer to the start of the data of the chunk.
-
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
-void lodepng_chunk_generate_crc(unsigned char* chunk):
-
-Check if the crc is correct or generate a correct one.
-
-unsigned char* lodepng_chunk_next(unsigned char* chunk):
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
-
-Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
-functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
-data available in the buffer to be able to go to the next chunk.
-
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data):
-
-These functions are used to create new chunks that are appended to the data in *out that has
-length *outlength. The append function appends an existing chunk to the new data. The create
-function creates a new chunk with the given parameters and appends it. Type is the 4-letter
-name of the chunk.
-
-8.2. chunks in info_png
------------------------
-
-The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
-buffers (each with size) to contain 3 types of unknown chunks:
-the ones that come before the PLTE chunk, the ones that come between the PLTE
-and the IDAT chunks, and the ones that come after the IDAT chunks.
-It's necessary to make the distionction between these 3 cases because the PNG
-standard forces to keep the ordering of unknown chunks compared to the critical
-chunks, but does not force any other ordering rules.
-
-info_png.unknown_chunks_data[0] is the chunks before PLTE
-info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
-info_png.unknown_chunks_data[2] is the chunks after IDAT
-
-The chunks in these 3 buffers can be iterated through and read by using the same
-way described in the previous subchapter.
-
-When using the decoder to decode a PNG, you can make it store all unknown chunks
-if you set the option settings.remember_unknown_chunks to 1. By default, this
-option is off (0).
-
-The encoder will always encode unknown chunks that are stored in the info_png.
-If you need it to add a particular chunk that isn't known by LodePNG, you can
-use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
-info_png.unknown_chunks_data[x].
-
-Chunks that are known by LodePNG should not be added in that way. E.g. to make
-LodePNG add a bKGD chunk, set background_defined to true and add the correct
-parameters there instead.
-
-
-9. compiler support
--------------------
-
-No libraries other than the current standard C library are needed to compile
-LodePNG. For the C++ version, only the standard C++ library is needed on top.
-Add the files lodepng.c(pp) and lodepng.h to your project, include
-lodepng.h where needed, and your program can read/write PNG files.
-
-It is compatible with C90 and up, and C++03 and up.
-
-If performance is important, use optimization when compiling! For both the
-encoder and decoder, this makes a large difference.
-
-Make sure that LodePNG is compiled with the same compiler of the same version
-and with the same settings as the rest of the program, or the interfaces with
-std::vectors and std::strings in C++ can be incompatible.
-
-CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
-
-*) gcc and g++
-
-LodePNG is developed in gcc so this compiler is natively supported. It gives no
-warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
-version 4.7.1 on Linux, 32-bit and 64-bit.
-
-*) Clang
-
-Fully supported and warning-free.
-
-*) Mingw
-
-The Mingw compiler (a port of gcc for Windows) should be fully supported by
-LodePNG.
-
-*) Visual Studio and Visual C++ Express Edition
-
-LodePNG should be warning-free with warning level W4. Two warnings were disabled
-with pragmas though: warning 4244 about implicit conversions, and warning 4996
-where it wants to use a non-standard function fopen_s instead of the standard C
-fopen.
-
-Visual Studio may want "stdafx.h" files to be included in each source file and
-give an error "unexpected end of file while looking for precompiled header".
-This is not standard C++ and will not be added to the stock LodePNG. You can
-disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
-Precompiled Headers, and set it to Not Using Precompiled Headers there.
-
-NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
-VS6, are not guaranteed to work.
-
-*) Compilers on Macintosh
-
-LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
-C and C++.
-
-*) Other Compilers
-
-If you encounter problems on any compilers, feel free to let me know and I may
-try to fix it if the compiler is modern and standards complient.
-
-
-10. examples
-------------
-
-This decoder example shows the most basic usage of LodePNG. More complex
-examples can be found on the LodePNG website.
-
-10.1. decoder C++ example
--------------------------
-
-#include "lodepng.h"
-#include <iostream>
-
-int main(int argc, char *argv[])
-{
-  const char* filename = argc > 1 ? argv[1] : "test.png";
-
-  //load and decode
-  std::vector<unsigned char> image;
-  unsigned width, height;
-  unsigned error = lodepng::decode(image, width, height, filename);
-
-  //if there's an error, display it
-  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
-
-  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
-}
-
-10.2. decoder C example
------------------------
-
-#include "lodepng.h"
-
-int main(int argc, char *argv[])
-{
-  unsigned error;
-  unsigned char* image;
-  size_t width, height;
-  const char* filename = argc > 1 ? argv[1] : "test.png";
-
-  error = lodepng_decode32_file(&image, &width, &height, filename);
-
-  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
-
-  / * use image here * /
-
-  free(image);
-  return 0;
-}
-
-11. state settings reference
-----------------------------
-
-A quick reference of some settings to set on the LodePNGState
-
-For decoding:
-
-state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
-state.decoder.zlibsettings.custom_...: use custom inflate function
-state.decoder.ignore_crc: ignore CRC checksums
-state.decoder.color_convert: convert internal PNG color to chosen one
-state.decoder.read_text_chunks: whether to read in text metadata chunks
-state.decoder.remember_unknown_chunks: whether to read in unknown chunks
-state.info_raw.colortype: desired color type for decoded image
-state.info_raw.bitdepth: desired bit depth for decoded image
-state.info_raw....: more color settings, see struct LodePNGColorMode
-state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
-
-For encoding:
-
-state.encoder.zlibsettings.btype: disable compression by setting it to 0
-state.encoder.zlibsettings.use_lz77: use LZ77 in compression
-state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
-state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
-state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
-state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
-state.encoder.zlibsettings.custom_...: use custom deflate function
-state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
-state.encoder.filter_palette_zero: PNG filter strategy for palette
-state.encoder.filter_strategy: PNG filter strategy to encode with
-state.encoder.force_palette: add palette even if not encoding to one
-state.encoder.add_id: add LodePNG identifier and version as a text chunk
-state.encoder.text_compression: use compressed text chunks for metadata
-state.info_raw.colortype: color type of raw input image you provide
-state.info_raw.bitdepth: bit depth of raw input image you provide
-state.info_raw: more color settings, see struct LodePNGColorMode
-state.info_png.color.colortype: desired color type if auto_convert is false
-state.info_png.color.bitdepth: desired bit depth if auto_convert is false
-state.info_png.color....: more color settings, see struct LodePNGColorMode
-state.info_png....: more PNG related settings, see struct LodePNGInfo
-
-
-12. changes
------------
-
-The version number of LodePNG is the date of the change given in the format
-yyyymmdd.
-
-Some changes aren't backwards compatible. Those are indicated with a (!)
-symbol.
-
-*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
-*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
-   the limits of pure C90).
-*) 08 dec 2015: Made load_file function return error if file can't be opened.
-*) 24 okt 2015: Bugfix with decoding to palette output.
-*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
-*) 23 aug 2014: Reduced needless memory usage of decoder.
-*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
-    simplicity. Made ColorProfile public.
-*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
-*) 22 dec 2013: Power of two windowsize required for optimization.
-*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
-*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
-*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
-    prefix for the custom allocators and made it possible with a new #define to
-    use custom ones in your project without needing to change lodepng's code.
-*) 28 jan 2013: Bugfix with color key.
-*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
-*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
-    (no palette). Better deflate tree encoding. New compression tweak settings.
-    Faster color conversions while decoding. Some internal cleanups.
-*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
-*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
-    and made it work with function pointers instead.
-*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
-    and free functions and toggle #defines from compiler flags. Small fixes.
-*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
-*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
-    redundant C++ codec classes. Reduced amount of structs. Everything changed,
-    but it is cleaner now imho and functionality remains the same. Also fixed
-    several bugs and shrunk the implementation code. Made new samples.
-*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
-    PNG color model and bit depth, based on the amount and type of colors of the
-    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
-*) 9 okt 2011: simpler hash chain implementation for the encoder.
-*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
-*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
-    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
-    better ones (it's quite significant). A setting to do an experimental, slow,
-    brute force search for PNG filter types is added.
-*) 17 aug 2011 (!): changed some C zlib related function names.
-*) 16 aug 2011: made the code less wide (max 120 characters per line).
-*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
-*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
-*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
-    to optimize long sequences of zeros.
-*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
-    LodePNG_InfoColor_canHaveAlpha functions for convenience.
-*) 7 nov 2010: added LodePNG_error_text function to get error code description.
-*) 30 okt 2010: made decoding slightly faster
-*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
-     Reorganized the documentation and the declaration order in the header.
-*) 08 aug 2010: only changed some comments and external samples.
-*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
-*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
-*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
-    read by ignoring the problem but windows apps couldn't.
-*) 06 jun 2008: added more error checks for out of memory cases.
-*) 26 apr 2008: added a few more checks here and there to ensure more safety.
-*) 06 mar 2008: crash with encoding of strings fixed
-*) 02 feb 2008: support for international text chunks added (iTXt)
-*) 23 jan 2008: small cleanups, and #defines to divide code in sections
-*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
-*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
-*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
-    Also various fixes, such as in the deflate and the padding bits code.
-*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
-    filtering code of encoder.
-*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
-    C++ wrapper around this provides an interface almost identical to before.
-    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
-    are together in these files but it works both for C and C++ compilers.
-*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
-*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
-*) 09 aug 2007: some VS2005 warnings removed again
-*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
-*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
-*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
-    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
-*) 02 jun 2007: made the encoder add a tag with version by default
-*) 27 may 2007: zlib and png code separated (but still in the same file),
-    simple encoder/decoder functions added for more simple usage cases
-*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
-    moved some examples from here to lodepng_examples.cpp
-*) 12 may 2007: palette decoding bug fixed
-*) 24 apr 2007: changed the license from BSD to the zlib license
-*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
-*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
-    palettized PNG images. Plus little interface change with palette and texts.
-*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
-    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
-*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
-    and supported by the encoder, resulting in smaller PNGs at the output.
-*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
-*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
-    greyscale type to 8-bit greyscale with or without alpha.
-*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
-    to convert to and is more uniform. See the manual for how it works now.
-*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
-    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
-    at last made the decoder give errors for incorrect Adler32 or Crc.
-*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
-*) 29 dec 2006: Added support for encoding images without alpha channel, and
-    cleaned out code as well as making certain parts faster.
-*) 28 dec 2006: Added "Settings" to the encoder.
-*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
-    Removed some code duplication in the decoder. Fixed little bug in an example.
-*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
-    Fixed a bug of the decoder with 16-bit per color.
-*) 15 okt 2006: Changed documentation structure
-*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
-    given image buffer, however for now it's not compressed.
-*) 08 sep 2006: (!) Changed to interface with a Decoder class
-*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
-    way. Renamed decodePNG to decodePNGGeneric.
-*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
-    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
-*) 28 jul 2006: Cleaned the code and added new error checks.
-    Corrected terminology "deflate" into "inflate".
-*) 23 jun 2006: Added SDL example in the documentation in the header, this
-    example allows easy debugging by displaying the PNG and its transparency.
-*) 22 jun 2006: (!) Changed way to obtain error value. Added
-    loadFile function for convenience. Made decodePNG32 faster.
-*) 21 jun 2006: (!) Changed type of info vector to unsigned.
-    Changed position of palette in info vector. Fixed an important bug that
-    happened on PNGs with an uncompressed block.
-*) 16 jun 2006: Internally changed unsigned into unsigned where
-    needed, and performed some optimizations.
-*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
-    in LodePNG namespace. Changed the order of the parameters. Rewrote the
-    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
-*) 22 apr 2006: Optimized and improved some code
-*) 07 sep 2005: (!) Changed to std::vector interface
-*) 12 aug 2005: Initial release (C++, decoder only)
-
-
-13. contact information
------------------------
-
-Feel free to contact me with suggestions, problems, comments, ... concerning
-LodePNG. If you encounter a PNG image that doesn't work properly with this
-decoder, feel free to send it and I'll use it to find and fix the problem.
-
-My email address is (puzzle the account and domain together with an @ symbol):
-Domain: gmail dot com.
-Account: lode dot vandevenne.
-
-
-Copyright (c) 2005-2016 Lode Vandevenne
-*/

+ 0 - 24
3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt

@@ -1,24 +0,0 @@
-NVIDIA Texture Tools 2.0 is licensed under the MIT license.
-
-Copyright (c) 2007 NVIDIA Corporation
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.

+ 0 - 75
3rdparty/nvtt/bc6h/bits.h

@@ -1,75 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#ifndef _ZOH_BITS_H
-#define _ZOH_BITS_H
-
-// read/write a bitstream
-
-#include "nvcore/debug.h"
-
-namespace ZOH {
-
-class Bits
-{
-public:
-
-	Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
-	Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
-
-	void write(int value, int nbits) {
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		for (int i=0; i<nbits; ++i)
-			writeone(value>>i);
-	}
-	int read(int nbits) { 
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		int out = 0;
-		for (int i=0; i<nbits; ++i)
-			out |= readone() << i;
-		return out;
-	}
-	int getptr() { return bptr; }
-	void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
-	int getsize() { return bend; }
-
-private:
-	int	bptr;		// next bit to read
-	int bend;		// last written bit + 1
-	char *bits;		// ptr to user bit stream
-	const char *cbits;	// ptr to const user bit stream
-	int maxbits;	// max size of user bit stream
-	char readonly;	// 1 if this is a read-only stream
-
-	int readone() {
-		nvAssert (bptr < bend);
-		if (bptr >= bend) return 0;
-		int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
-		++bptr;
-		return bit != 0;
-	}
-	void writeone(int bit) {
-		nvAssert (!readonly); // "Writing a read-only bit stream"
-		nvAssert (bptr < maxbits);
-		if (bptr >= maxbits) return;
-		if (bit&1)
-			bits[bptr>>3] |= 1 << (bptr & 7);
-		else
-			bits[bptr>>3] &= ~(1 << (bptr & 7));
-		if (bptr++ >= bend) bend = bptr;
-	}
-};
-
-}
-
-#endif

+ 0 - 133
3rdparty/nvtt/bc6h/shapes_two.h

@@ -1,133 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#pragma once
-#ifndef _ZOH_SHAPES_TWO_H
-#define _ZOH_SHAPES_TWO_H
-
-// shapes for two regions
-
-#define NREGIONS 2
-#define NSHAPES 64
-#define SHAPEBITS 6
-
-static const int shapes[NSHAPES*16] = 
-{
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   0, 0, 0, 1,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   1, 1, 1, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 0,   0, 1, 1, 1,   
-1, 0, 0, 0,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 1, 1,   
-1, 1, 1, 0,   0, 0, 0, 0,   1, 0, 0, 0,   0, 0, 0, 1,   
-1, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 0, 0, 0,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 1, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 0, 0,   1, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   0, 0, 0, 1,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 0,   1, 1, 0, 0,   0, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 1,   1, 0, 0, 1,   
-1, 1, 1, 0,   1, 1, 1, 1,   1, 0, 0, 0,   1, 0, 0, 1,   
-1, 0, 0, 0,   0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   0, 0, 1, 1,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   0, 0, 1, 1,   
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   1, 1, 0, 0,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 1, 1,   0, 1, 0, 1,   0, 1, 1, 0,   0, 1, 0, 1,   
-1, 1, 0, 0,   0, 1, 0, 1,   1, 0, 0, 1,   1, 0, 1, 0,   
-0, 0, 1, 1,   1, 0, 1, 0,   0, 1, 1, 0,   1, 0, 1, 0,   
-1, 1, 0, 0,   1, 0, 1, 0,   1, 0, 0, 1,   0, 1, 0, 1,   
-
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   1, 0, 1, 1,   
-1, 1, 0, 0,   1, 1, 0, 0,   0, 1, 0, 0,   1, 1, 0, 1,   
-1, 1, 1, 0,   1, 0, 0, 0,   1, 1, 0, 0,   1, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 0, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   0, 1, 1, 0,   
-0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   0, 0, 0, 0,   
-
-0, 1, 0, 0,   0, 0, 1, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-1, 1, 1, 0,   0, 1, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 1, 0, 0,   0, 0, 1, 0,   0, 1, 1, 1,   1, 1, 1, 0,   
-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 0,   0, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 1, 1,   
-1, 1, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   1, 1, 0, 0,   
-0, 0, 1, 1,   1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   
-
-0, 1, 1, 0,   0, 1, 1, 0,   0, 1, 1, 1,   0, 0, 0, 1,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 1, 1, 0,   1, 0, 0, 0,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 0, 0, 0,   1, 1, 1, 0,   
-1, 0, 0, 1,   1, 0, 0, 1,   0, 0, 0, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-1, 1, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 0,   0, 1, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 1, 1, 1,   
-
-};
-
-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
-
-static const int shapeindex_to_compressed_indices[NSHAPES*2] = 
-{
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-
-	0,15,  0, 2,  0, 8,  0, 2,
-	0, 2,  0, 8,  0, 8,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 8,  0, 8,  0, 2,  0, 2,
-
-	0,15,  0,15,  0, 6,  0, 8,
-	0, 2,  0, 8,  0,15,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 2,  0,15,  0,15,  0, 6,
-
-	0, 6,  0, 2,  0, 6,  0, 8,
-	0,15,  0,15,  0, 2,  0, 2,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0, 2,  0, 2,  0,15
-
-};
-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*2+(region)]
-
-#endif

+ 0 - 82
3rdparty/nvtt/bc6h/tile.h

@@ -1,82 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#ifndef _ZOH_TILE_H
-#define _ZOH_TILE_H
-
-#include "zoh_utils.h"
-#include "nvmath/vector.h"
-#include <math.h>
-
-namespace ZOH {
-
-//#define	USE_IMPORTANCE_MAP	1		// define this if you want to increase importance of some pixels in tile
-class Tile
-{
-public:
-	// NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value
-	static float half2float(uint16 h)
-	{
-		return (float) Utils::ushort_to_format(h);
-	}
-	// NOTE: this is the inverse of the above operation
-	static uint16 float2half(float f)
-	{
-		return Utils::format_to_ushort((int)f);
-	}
-
-	// look for adjacent pixels that are identical. if there are enough of them, increase their importance
-	void generate_importance_map()
-	{
-		// initialize
-		for (int y=0; y<size_y; ++y)
-		for (int x=0; x<size_x; ++x)
-		{
-			// my importance is increased if I am identical to any of my 4-neighbors
-			importance_map[y][x] = match_4_neighbor(x,y) ? 5.0f : 1.0f;
-		}
-	}
-	bool is_equal(int x, int y, int xn, int yn)
-	{
-		if (xn < 0 || xn >= size_x || yn < 0 || yn >= size_y)
-			return false;
-		return( (data[y][x].x == data[yn][xn].x) &&
-				(data[y][x].y == data[yn][xn].y) &&
-				(data[y][x].z == data[yn][xn].z) );
-	}
-
-#ifdef USE_IMPORTANCE_MAP
-	bool match_4_neighbor(int x, int y)
-	{
-		return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1);
-	}
-#else
-	bool match_4_neighbor(int, int)
-	{
-		return false;
-	}
-#endif
-
-	Tile() {};
-	~Tile(){};
-	Tile(int xs, int ys) {size_x = xs; size_y = ys;}
-
-	static const int TILE_H = 4;
-	static const int TILE_W = 4;
-	static const int TILE_TOTAL = TILE_H * TILE_W;
-    nv::Vector3 data[TILE_H][TILE_W];
-	float importance_map[TILE_H][TILE_W];
-	int	size_x, size_y;			// actual size of tile
-};
-
-}
-
-#endif // _ZOH_TILE_H

+ 0 - 197
3rdparty/nvtt/bc6h/zoh.cpp

@@ -1,197 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// the zoh compressor and decompressor
-
-#include "tile.h"
-#include "zoh.h"
-
-#include <string.h> // memcpy
-
-using namespace ZOH;
-
-
-bool ZOH::isone(const char *block)
-{
-	char code = block[0] & 0x1F;
-
-	return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f);
-}
-
-void ZOH::compress(const Tile &t, char *block)
-{
-	char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE];
-
-	float mseone = ZOH::compressone(t, oneblock);
-	float msetwo = ZOH::compresstwo(t, twoblock);
-
-	if (mseone <= msetwo)
-		memcpy(block, oneblock, ZOH::BLOCKSIZE);
-	else
-		memcpy(block, twoblock, ZOH::BLOCKSIZE);
-}
-
-void ZOH::decompress(const char *block, Tile &t)
-{
-	if (ZOH::isone(block))
-		ZOH::decompressone(block, t);
-	else
-		ZOH::decompresstwo(block, t);
-}
-
-/*
-void ZOH::compress(string inf, string zohf)
-{
-	Array2D<Rgba> pixels;
-	int w, h;
-	char block[ZOH::BLOCKSIZE];
-
-	Exr::readRgba(inf, pixels, w, h);
-	FILE *zohfile = fopen(zohf.c_str(), "wb");
-	if (zohfile == NULL) throw "Unable to open .zoh file for write";
-
-	// stuff for progress bar O.o
-	int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
-	int tilecnt = 0;
-	int ndots = 25;
-	int dotcnt = 0;
-	printf("Progress [");
-	for (int i=0; i<ndots;++i) printf(" ");
-	printf("]\rProgress ["); fflush(stdout);
-
-	// convert to tiles and compress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			t.insert(pixels, x, y);
-
-			ZOH::compress(t, block);
-			if (fwrite(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
-				throw "File error on write";
-
-			// progress bar
-			++tilecnt;
-			if (tilecnt > (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; }
-		}
-	}
-
-	printf("]\n");		// advance to next line finally
-
-	if (fclose(zohfile)) throw "Close failed on .zoh file";
-}
-
-static int str2int(std::string s)
-{
-	int thing;
-	std::stringstream str (stringstream::in | stringstream::out);
-	str << s;
-	str >> thing;
-	return thing;
-}
-
-// zoh file name is ...-w-h.zoh, extract width and height
-static void extract(string zohf, int &w, int &h)
-{
-	size_t n = zohf.rfind('.', zohf.length()-1);
-	size_t n1 = zohf.rfind('-', n-1);
-	size_t n2 = zohf.rfind('-', n1-1);
-	string width = zohf.substr(n2+1, n1-n2-1);
-	w = str2int(width);
-	string height = zohf.substr(n1+1, n-n1-1);
-	h = str2int(height);
-}
-
-static int mode_to_prec[] = {
-	10,7,11,10,
-	10,7,11,11,
-	10,7,11,12,
-	10,7,9,16,
-	10,7,8,-1,
-	10,7,8,-1,
-	10,7,8,-1,
-	10,7,6,-1,
-};
-
-static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions;
-
-static void stats(char block[ZOH::BLOCKSIZE])
-{
-	char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++;
-	int prec = mode_to_prec[mode];
-	nvAssert (prec != -1);
-	if (!ZOH::isone(block))
-	{
-		tworegions++;
-		prechisttwo[prec]++;
-		int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3);
-		shapeindexhist[shapeindex]++;
-	}
-	else
-	{
-		oneregion++;
-		prechistone[prec]++;
-	}
-}
-
-static void printstats()
-{
-	printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]);
-	printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]);
-	printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]);
-	printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]);
-	printf("\nOne region %5.2f%%  Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions));
-	printf("\n");
-}
-
-void ZOH::decompress(string zohf, string outf)
-{
-	Array2D<Rgba> pixels;
-	int w, h;
-	char block[ZOH::BLOCKSIZE];
-
-	extract(zohf, w, h);
-	FILE *zohfile = fopen(zohf.c_str(), "rb");
-	if (zohfile == NULL) throw "Unable to open .zoh file for read";
-	pixels.resizeErase(h, w);
-
-	// convert to tiles and decompress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			if (fread(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
-				throw "File error on read";
-
-			stats(block);	// collect statistics
-
-			ZOH::decompress(block, t);
-
-			t.extract(pixels, x, y);
-		}
-	}
-	if (fclose(zohfile)) throw "Close failed on .zoh file";
-	Exr::writeRgba(outf, pixels, w, h);
-
-#ifndef EXTERNAL_RELEASE
-	printstats();	// print statistics
-#endif
-}
-*/

+ 0 - 65
3rdparty/nvtt/bc6h/zoh.h

@@ -1,65 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-#pragma once
-#ifndef _ZOH_H
-#define _ZOH_H
-
-#include "tile.h"
-
-namespace ZOH {
-
-// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f
-
-static const int NREGIONS_TWO	= 2;
-static const int NREGIONS_ONE	= 1;
-static const int NCHANNELS		= 3;
-
-struct FltEndpts
-{
-    nv::Vector3 A;
-    nv::Vector3 B;
-};
-
-struct IntEndpts
-{
-	int A[NCHANNELS];
-	int B[NCHANNELS];
-};
-
-struct ComprEndpts
-{
-	uint A[NCHANNELS];
-	uint B[NCHANNELS];
-};
-
-static const int BLOCKSIZE=16;
-static const int BITSIZE=128;
-
-void compress(const Tile &t, char *block);
-void decompress(const char *block, Tile &t);
-
-float compressone(const Tile &t, char *block);
-float compresstwo(const Tile &t, char *block);
-void decompressone(const char *block, Tile &t);
-void decompresstwo(const char *block, Tile &t);
-
-float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block);
-float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]);
-
-float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block);
-float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]);
-
-bool isone(const char *block);
-
-}
-
-#endif // _ZOH_H

+ 0 - 324
3rdparty/nvtt/bc6h/zoh_utils.cpp

@@ -1,324 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Utility and common routines
-
-#include "zoh_utils.h"
-#include "nvmath/vector.inl"
-#include <math.h>
-
-using namespace nv;
-using namespace ZOH;
-
-static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
-static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
-
-/*static*/ Format Utils::FORMAT;
-
-int Utils::lerp(int a, int b, int i, int denom)
-{
-	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
-	nvDebugCheck (i >= 0 && i <= denom);
-
-	int round = 32, shift = 6;
-	const int *weights;
-
-	switch(denom)
-	{
-	case 3:		denom *= 5; i *= 5;	// fall through to case 15
-	case 15:	weights = denom15_weights_64; break;
-	case 7:		weights = denom7_weights_64; break;
-	default:	nvDebugCheck(0);
-	}
-
-	return (a*weights[denom-i] +b*weights[i] + round) >> shift;
-}
-
-Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom)
-{
-	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
-	nvDebugCheck (i >= 0 && i <= denom);
-
-	int shift = 6;
-	const int *weights;
-
-	switch(denom)
-	{
-	case 3:		denom *= 5; i *= 5;	// fall through to case 15
-	case 15:	weights = denom15_weights_64; break;
-	case 7:		weights = denom7_weights_64; break;
-	default:	nvUnreachable();
-	}
-
-	// no need to round these as this is an exact division
-	return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift);
-}
-
-
-/*
-	For unsigned f16, clamp the input to [0,F16MAX]. Thus u15.
-	For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16.
-
-	The conversions proceed as follows:
-
-	unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX.
-	signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value
-	unsigned int: get bits. return as a positive value.
-	signed int. get bits. return as a value in -32768..32767.
-
-	The inverse conversions are just the inverse of the above.
-*/
-
-// clamp the 3 channels of the input vector to the allowable range based on FORMAT
-// note that each channel is a float storing the allowable range as a bit pattern converted to float
-// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX]
-
-void Utils::clamp(Vector3 &v)
-{
-	for (int i=0; i<3; ++i)
-	{
-		switch(Utils::FORMAT)
-		{
-		case UNSIGNED_F16:
-			if (v.component[i] < 0.0) v.component[i] = 0;
-			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
-			break;
-
-		case SIGNED_F16:
-			if (v.component[i] < -F16MAX) v.component[i] = -F16MAX;
-			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
-			break;
-
-		default:
-			nvUnreachable();
-		}
-	}
-}
-
-// convert a u16 value to s17 (represented as an int) based on the format expected
-int Utils::ushort_to_format(unsigned short input)
-{
-	int out, s;
-
-	// clamp to the valid range we are expecting
-	switch (Utils::FORMAT)
-	{
-	case UNSIGNED_F16:
-		if (input & F16S_MASK) out = 0;
-		else if (input > F16MAX) out = F16MAX;
-		else out = input;
-		break;
-
-	case SIGNED_F16:
-		s = input & F16S_MASK;
-		input &= F16EM_MASK;
-		if (input > F16MAX) out = F16MAX;
-		else out = input;
-		out = s ? -out : out;
-		break;
-	}
-	return out;
-}
-
-// convert a s17 value to u16 based on the format expected
-unsigned short Utils::format_to_ushort(int input)
-{
-	unsigned short out;
-
-	// clamp to the valid range we are expecting
-	switch (Utils::FORMAT)
-	{
-	case UNSIGNED_F16:
-		nvDebugCheck (input >= 0 && input <= F16MAX);
-		out = input;
-		break;
-
-	case SIGNED_F16:
-		nvDebugCheck (input >= -F16MAX && input <= F16MAX);
-		// convert to sign-magnitude
-		int s;
-		if (input < 0) { s = F16S_MASK; input = -input; }
-		else           { s = 0; }
-		out = s | input;
-		break;
-	}
-	return out;
-}
-
-// quantize the input range into equal-sized bins
-int Utils::quantize(float value, int prec)
-{
-	int q, ivalue, s;
-
-	nvDebugCheck (prec > 1);	// didn't bother to make it work for 1
-
-	value = (float)floor(value + 0.5);
-
-	int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0;	// bias precisions 11..16 to get a more accurate quantization
-
-	switch (Utils::FORMAT)
-	{
-	case UNSIGNED_F16:
-		nvDebugCheck (value >= 0 && value <= F16MAX);
-		ivalue = (int)value;
-		q = ((ivalue << prec) + bias) / (F16MAX+1);
-		nvDebugCheck (q >= 0 && q < (1 << prec));
-		break;
-
-	case SIGNED_F16:
-		nvDebugCheck (value >= -F16MAX && value <= F16MAX);
-		// convert to sign-magnitude
-		ivalue = (int)value;
-		if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0;
-
-		q = ((ivalue << (prec-1)) + bias) / (F16MAX+1);
-		if (s)
-			q = -q;
-		nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
-		break;
-	}
-
-	return q;
-}
-
-int Utils::finish_unquantize(int q, int prec)
-{
-	if (Utils::FORMAT == UNSIGNED_F16)
-		return (q * 31) >> 6;										// scale the magnitude by 31/64
-	else if (Utils::FORMAT == SIGNED_F16)
-		return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5;		// scale the magnitude by 31/32
-	else
-		return q;
-}
-
-// unquantize each bin to midpoint of original bin range, except
-// for the end bins which we push to an endpoint of the bin range.
-// we do this to ensure we can represent all possible original values.
-// the asymmetric end bins do not affect PSNR for the test images.
-//
-// code this function assuming an arbitrary bit pattern as the encoded block
-int Utils::unquantize(int q, int prec)
-{
-	int unq, s;
-
-	nvDebugCheck (prec > 1);	// not implemented for prec 1
-
-	switch (Utils::FORMAT)
-	{
-	// modify this case to move the multiplication by 31 after interpolation.
-	// Need to use finish_unquantize.
-
-	// since we have 16 bits available, let's unquantize this to 16 bits unsigned
-	// thus the scale factor is [0-7c00)/[0-10000) = 31/64
-	case UNSIGNED_F16:
-		if (prec >= 15) 
-			unq = q;
-		else if (q == 0) 
-			unq = 0;
-		else if (q == ((1<<prec)-1)) 
-			unq = U16MAX;
-		else
-			unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
-		break;
-
-	// here, let's stick with S16 (no apparent quality benefit from going to S17)
-	// range is (-7c00..7c00)/(-8000..8000) = 31/32
-	case SIGNED_F16:
-		// don't remove this test even though it appears equivalent to the code below
-		// as it isn't -- the code below can overflow for prec = 16
-		if (prec >= 16)
-			unq = q;
-		else
-		{
-			if (q < 0) { s = 1; q = -q; } else s = 0;
-
-			if (q == 0)
-				unq = 0;
-			else if (q >= ((1<<(prec-1))-1))
-				unq = s ? -S16MAX : S16MAX;
-			else
-			{
-				unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
-				if (s)
-					unq = -unq;
-			}
-		}
-		break;
-	}
-	return unq;
-}
-
-
-
-// pick a norm!
-#define	NORM_EUCLIDEAN 1
-
-float Utils::norm(const Vector3 &a, const Vector3 &b)
-{
-#ifdef	NORM_EUCLIDEAN
-	return lengthSquared(a - b);
-#endif
-#ifdef	NORM_ABS
-	Vector3 err = a - b;
-	return fabs(err.x) + fabs(err.y) + fabs(err.z);
-#endif
-}
-
-// parse <name>[<start>{:<end>}]{,}	
-// the pointer starts here         ^
-// name is 1 or 2 chars and matches field names. start and end are decimal numbers
-void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len)
-{
-	if (ptr <= 0) return;
-	--ptr;
-	if (encoding[ptr] == ',') --ptr;
-	nvDebugCheck (encoding[ptr] == ']');
-	--ptr;
-	endbit = 0;
-	int scale = 1;
-	while (encoding[ptr] != ':' && encoding[ptr] != '[')
-	{
-		nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
-		endbit += (encoding[ptr--] - '0') * scale;
-		scale *= 10;
-	}
-	int startbit = 0; scale = 1;
-	if (encoding[ptr] == '[')
-		startbit = endbit;
-	else  
-	{
-		ptr--;
-		while (encoding[ptr] != '[')
-		{
-			nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
-			startbit += (encoding[ptr--] - '0') * scale;
-			scale *= 10;
-		}
-	}
-	len = startbit - endbit + 1;	// startbit>=endbit note
-	--ptr;
-	if (encoding[ptr] == 'm')		field = FIELD_M;
-	else if (encoding[ptr] == 'd')	field = FIELD_D;
-	else {
-		// it's wxyz
-		nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z');
-		int foo = encoding[ptr--] - 'w';
-		// now it is r g or b
-		if (encoding[ptr] == 'r')		foo += 10;
-		else if (encoding[ptr] == 'g')	foo += 20;
-		else if (encoding[ptr] == 'b')	foo += 30;
-		else nvDebugCheck(0);
-		field = (Field) foo;
-	}
-}
-
-

+ 0 - 72
3rdparty/nvtt/bc6h/zoh_utils.h

@@ -1,72 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// utility class holding common routines
-#ifndef _ZOH_UTILS_H
-#define _ZOH_UTILS_H
-
-#include "nvmath/vector.h"
-
-namespace ZOH {
-
-inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); }
-
-enum Field {
-    FIELD_M = 1,	// mode
-    FIELD_D = 2,	// distribution/shape
-    FIELD_RW = 10+0, FIELD_RX = 10+1, FIELD_RY = 10+2, FIELD_RZ = 10+3,	// red channel endpoints or deltas
-    FIELD_GW = 20+0, FIELD_GX = 20+1, FIELD_GY = 20+2, FIELD_GZ = 20+3,	// green channel endpoints or deltas
-    FIELD_BW = 30+0, FIELD_BX = 30+1, FIELD_BY = 30+2, FIELD_BZ = 30+3,	// blue channel endpoints or deltas
-};
-
-// some constants
-static const int F16S_MASK	=  0x8000;		// f16 sign mask
-static const int F16EM_MASK	=  0x7fff;		// f16 exp & mantissa mask
-static const int U16MAX		=  0xffff;
-static const int S16MIN		= -0x8000;
-static const int S16MAX		=  0x7fff;
-static const int INT16_MASK	=  0xffff;
-static const int F16MAX		=  0x7bff;		// MAXFLT bit pattern for halfs
-
-enum Format { UNSIGNED_F16, SIGNED_F16 };
-
-class Utils
-{
-public:
-    static Format FORMAT;     // this is a global -- we're either handling unsigned or unsigned half values
-
-    // error metrics
-    static float norm(const nv::Vector3 &a, const nv::Vector3 &b);
-    static float mpsnr_norm(const nv::Vector3 &a, int exposure, const nv::Vector3 &b);
-
-    // conversion & clamp
-    static int ushort_to_format(unsigned short input);
-    static unsigned short format_to_ushort(int input);
-
-    // clamp to format
-    static void clamp(nv::Vector3 &v);
-
-    // quantization and unquantization
-    static int finish_unquantize(int q, int prec);
-    static int unquantize(int q, int prec);
-    static int quantize(float value, int prec);
-
-    static void parse(const char *encoding, int &ptr, Field & field, int &endbit, int &len);
-
-    // lerping
-    static int lerp(int a, int b, int i, int denom);
-    static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom);
-};
-
-}
-
-#endif // _ZOH_UTILS_H

+ 0 - 799
3rdparty/nvtt/bc6h/zohone.cpp

@@ -1,799 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// one region zoh compress/decompress code
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-#include "bits.h"
-#include "tile.h"
-#include "zoh.h"
-#include "zoh_utils.h"
-
-#include "nvmath/vector.inl"
-#include "nvmath/fitting.h"
-
-#include <string.h> // strlen
-#include <float.h> // FLT_MAX
-
-using namespace nv;
-using namespace ZOH;
-
-#define NINDICES	16
-#define	INDEXBITS	4
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-
-#define	NSHAPES	1
-
-static const int shapes[NSHAPES] =
-{
-    0x0000
-};	// only 1 shape
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NDELTA	2
-
-struct Chanpat
-{
-    int prec[NDELTA];		// precision pattern for one channel
-};
-
-struct Pattern
-{
-    Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
-    int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-    int mode;				// associated mode value
-    int modebits;			// number of mode bits
-    const char *encoding;	// verilog description of encoding for this mode
-};
-
-#define MAXMODEBITS	5
-#define	MAXMODES (1<<MAXMODEBITS)
-
-#define	NPATTERNS 4
-
-static const Pattern patterns[NPATTERNS] =
-{
-    16,4,  16,4,  16,4,   1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    12,8,  12,8,  12,8,   1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    11,9,  11,9,  11,9,   1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    10,10, 10,10, 10,10,  0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-};
-
-// mapping of mode to the corresponding index in pattern
-static const int mode_to_pat[MAXMODES] = {
-    -1,-1,-1,
-    3,	// 0x03
-    -1,-1,-1,
-    2,	// 0x07
-    -1,-1,-1,
-    1,	// 0x0b
-    -1,-1,-1,
-    0,	// 0x0f
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-};
-
-#define	R_0(ep)	(ep)[0].A[i]
-#define	R_1(ep)	(ep)[0].B[i]
-#define	MASK(n)	((1<<(n))-1)
-
-// compress endpoints
-static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
-{
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
-        }
-    }
-}
-
-// decompress endpoints
-static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
-{
-    bool issigned = Utils::FORMAT == SIGNED_F16;
-
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            int t;
-            t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
-        }
-    }
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
-{
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-    {
-        q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
-        q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
-        q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
-        q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
-        q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
-        q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
-    }
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
-// index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-    int index_positions[NREGIONS_ONE];
-
-    index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-    {
-        int x = index_positions[region] & 3;
-        int y = (index_positions[region] >> 2) & 3;
-        nvDebugCheck(REGION(x,y,shapeindex) == region);		// double check the table
-        if (indices[y][x] & HIGH_INDEXBIT)
-        {
-            // high bit is set, swap the endpts and indices for this region
-            int t;
-            for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-            for (int y = 0; y < Tile::TILE_H; y++)
-                for (int x = 0; x < Tile::TILE_W; x++)
-                    if (REGION(x,y,shapeindex) == region)
-                        indices[y][x] = NINDICES - 1 - indices[y][x];
-        }
-    }
-}
-
-// endpoints fit only if the compression was lossless
-static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
-{
-    IntEndpts uncompressed[NREGIONS_ONE];
-
-    decompress_endpts(compressed, uncompressed, p);
-
-    for (int j=0; j<NREGIONS_ONE; ++j)
-	for (int i=0; i<NCHANNELS; ++i)
-	{
-        if (orig[j].A[i] != uncompressed[j].A[i]) return false;
-        if (orig[j].B[i] != uncompressed[j].B[i]) return false;
-    }
-    return true;
-}
-
-static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
-{
-    // interpret the verilog backwards and process it
-    int m = p.mode;
-    int rw = endpts[0].A[0], rx = endpts[0].B[0];
-    int gw = endpts[0].A[1], gx = endpts[0].B[1];
-    int bw = endpts[0].A[2], bx = endpts[0].B[2];
-    int ptr = int(strlen(p.encoding));
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-        switch(field)
-        {
-        case FIELD_M:	out.write( m >> endbit, len); break;
-        case FIELD_RW:	out.write(rw >> endbit, len); break;
-        case FIELD_RX:	out.write(rx >> endbit, len); break;
-        case FIELD_GW:	out.write(gw >> endbit, len); break;
-        case FIELD_GX:	out.write(gx >> endbit, len); break;
-        case FIELD_BW:	out.write(bw >> endbit, len); break;
-        case FIELD_BX:	out.write(bx >> endbit, len); break;
-
-        case FIELD_D:
-        case FIELD_RY:
-        case FIELD_RZ:
-        case FIELD_GY:
-        case FIELD_GZ:
-        case FIELD_BY:
-        case FIELD_BZ:
-        default: nvUnreachable();
-        }
-    }
-}
-
-static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
-{
-    // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
-    int mode = in.read(2);
-    if (mode != 0x00 && mode != 0x01)
-        mode = (in.read(3) << 2) | mode;
-
-    int pat_index = mode_to_pat[mode];
-
-    nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
-    nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
-
-    p = patterns[pat_index];
-
-    int d;
-    int rw, rx;
-    int gw, gx;
-    int bw, bx;
-
-    d = 0;
-    rw = rx = 0;
-    gw = gx = 0;
-    bw = bx = 0;
-
-    int ptr = int(strlen(p.encoding));
-
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-
-        switch(field)
-        {
-        case FIELD_M:	break;	// already processed so ignore
-        case FIELD_RW:	rw |= in.read(len) << endbit; break;
-        case FIELD_RX:	rx |= in.read(len) << endbit; break;
-        case FIELD_GW:	gw |= in.read(len) << endbit; break;
-        case FIELD_GX:	gx |= in.read(len) << endbit; break;
-        case FIELD_BW:	bw |= in.read(len) << endbit; break;
-        case FIELD_BX:	bx |= in.read(len) << endbit; break;
-
-        case FIELD_D:
-        case FIELD_RY:
-        case FIELD_RZ:
-        case FIELD_GY:
-        case FIELD_GZ:
-        case FIELD_BY:
-        case FIELD_BZ:
-        default: nvUnreachable();
-        }
-    }
-
-    nvDebugCheck (in.getptr() == 128 - 63);
-
-    endpts[0].A[0] = rw; endpts[0].B[0] = rx;
-    endpts[0].A[1] = gw; endpts[0].B[1] = gx;
-    endpts[0].A[2] = bw; endpts[0].B[2] = bx;
-}
-
-// compress index 0
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
-    }
-}
-
-static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-    Bits out(block, ZOH::BITSIZE);
-
-    write_header(endpts, p, out);
-
-    write_indices(indices, shapeindex, out);
-
-    nvDebugCheck(out.getptr() == ZOH::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
-{
-    // scale endpoints
-    int a, b;			// really need a IntVector3...
-
-    a = Utils::unquantize(endpts.A[0], prec);
-    b = Utils::unquantize(endpts.B[0], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[1], prec);
-    b = Utils::unquantize(endpts.B[1], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[2], prec);
-    b = Utils::unquantize(endpts.B[2], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-}
-
-// position 0 was compressed
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
-    }
-}
-
-void ZOH::decompressone(const char *block, Tile &t)
-{
-    Bits in(block, ZOH::BITSIZE);
-
-    Pattern p;
-    IntEndpts endpts[NREGIONS_ONE];
-    ComprEndpts compr_endpts[NREGIONS_ONE];
-
-    read_header(in, compr_endpts, p);
-    int shapeindex = 0;		// only one shape
-
-    decompress_endpts(compr_endpts, endpts, p);
-
-    Vector3 palette[NREGIONS_ONE][NINDICES];
-    for (int r = 0; r < NREGIONS_ONE; ++r)
-        generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
-
-    // read indices
-    int indices[Tile::TILE_H][Tile::TILE_W];
-
-    read_indices(in, shapeindex, indices);
-
-    nvDebugCheck(in.getptr() == ZOH::BITSIZE);
-
-    // lookup
-    for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-            t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
-{
-    Vector3 palette[NINDICES];
-    float toterr = 0;
-    Vector3 err;
-
-    generate_palette_quantized(endpts, prec, palette);
-
-    for (int i = 0; i < np; ++i)
-    {
-        float err, besterr;
-
-        besterr = Utils::norm(colors[i], palette[0]) * importance[i];
-
-        for (int j = 1; j < NINDICES && besterr > 0; ++j)
-        {
-            err = Utils::norm(colors[i], palette[j]) * importance[i];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, 
-                           int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_ONE][NINDICES];
-
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-    {
-        generate_palette_quantized(endpts[region], prec, &palette[region][0]);
-        toterr[region] = 0;
-    }
-
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]);
-        indices[y][x] = 0;
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]);
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-            {
-                besterr = err;
-                indices[y][x] = i;
-            }
-        }
-        toterr[region] += besterr;
-    }
-}
-
-static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
-                          float old_err, int do_b)
-{
-    // we have the old endpoints: old_endpts
-    // we have the perturbed endpoints: new_endpts
-    // we have the temporary endpoints: temp_endpts
-
-    IntEndpts temp_endpts;
-    float min_err = old_err;		// start with the best current error
-    int beststep;
-
-    // copy real endpoints so we can perturb them
-    for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
-
-    // do a logarithmic search for the best error for this endpoint (which)
-    for (int step = 1 << (prec-1); step; step >>= 1)
-    {
-        bool improved = false;
-        for (int sign = -1; sign <= 1; sign += 2)
-        {
-            if (do_b == 0)
-            {
-                temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-                if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-                    continue;
-            }
-            else
-            {
-                temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-                if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-                    continue;
-            }
-
-            float err = map_colors(colors, importance, np, temp_endpts, prec);
-
-            if (err < min_err)
-            {
-                improved = true;
-                min_err = err;
-                beststep = sign * step;
-            }
-        }
-        // if this was an improvement, move the endpoint and continue search from there
-        if (improved)
-        {
-            if (do_b == 0)
-                new_endpts.A[ch] += beststep;
-            else
-                new_endpts.B[ch] += beststep;
-        }
-    }
-    return min_err;
-}
-
-static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
-{
-    float opt_err = orig_err;
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        opt_endpts.A[ch] = orig_endpts.A[ch];
-        opt_endpts.B[ch] = orig_endpts.B[ch];
-    }
-    /*
-        err0 = perturb(rgb0, delta0)
-        err1 = perturb(rgb1, delta1)
-        if (err0 < err1)
-            if (err0 >= initial_error) break
-            rgb0 += delta0
-            next = 1
-        else
-            if (err1 >= initial_error) break
-            rgb1 += delta1
-            next = 0
-        initial_err = map()
-        for (;;)
-            err = perturb(next ? rgb1:rgb0, delta)
-            if (err >= initial_err) break
-            next? rgb1 : rgb0 += delta
-            initial_err = err
-	*/
-    IntEndpts new_a, new_b;
-    IntEndpts new_endpt;
-    int do_b;
-
-    // now optimize each channel separately
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        // figure out which endpoint when perturbed gives the most improvement and start there
-        // if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1);	// perturb endpt B
-
-        if (err0 < err1)
-        {
-            if (err0 >= opt_err)
-                continue;
-
-            opt_endpts.A[ch] = new_a.A[ch];
-            opt_err = err0;
-            do_b = 1;		// do B next
-        }
-        else
-        {
-            if (err1 >= opt_err)
-                continue;
-            opt_endpts.B[ch] = new_b.B[ch];
-            opt_err = err1;
-            do_b = 0;		// do A next
-        }
-
-        // now alternate endpoints and keep trying until there is no improvement
-        for (;;)
-        {
-            float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
-            if (err >= opt_err)
-                break;
-            if (do_b == 0)
-                opt_endpts.A[ch] = new_endpt.A[ch];
-            else
-                opt_endpts.B[ch] = new_endpt.B[ch];
-            opt_err = err;
-            do_b = 1 - do_b;	// now move the other endpoint
-        }
-    }
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE], 
-                            const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
-{
-    Vector3 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-    float err = 0;
-
-    for (int region=0; region<NREGIONS_ONE; ++region)
-    {
-        // collect the pixels in the region
-        int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    ++np;
-                }
-            }
-        }
-
-        optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
-    }
-}
-
-/* optimization algorithm
-    for each pattern
-        convert endpoints using pattern precision
-        assign indices and get initial error
-        compress indices (and possibly reorder endpoints)
-        transform endpoints
-        if transformed endpoints fit pattern
-            get original endpoints back
-            optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-            compress new indices
-            transform new endpoints
-            if new endpoints fit pattern AND if error is improved
-                emit compressed block with new data
-            else
-                emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
-{
-    float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
-    IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
-    ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
-    int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-    for (int sp = 0; sp < NPATTERNS; ++sp)
-    {
-        // precisions for all channels need to be the same
-        for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
-
-        quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
-        assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
-        swap_indices(orig_endpts, orig_indices, shapeindex_best);
-        compress_endpts(orig_endpts, compr_orig, patterns[sp]);
-        if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
-        {
-            optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
-            assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
-            swap_indices(opt_endpts, opt_indices, shapeindex_best);
-            compress_endpts(opt_endpts, compr_opt, patterns[sp]);
-            orig_toterr = opt_toterr = 0;
-            for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-
-            if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
-            {
-                emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
-                return opt_toterr;
-            }
-            else
-            {
-                // either it stopped fitting when we optimized it, or there was no improvement
-                // so go back to the unoptimized endpoints which we know will fit
-                emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
-                return orig_toterr;
-            }
-        }
-    }
-
-	nvAssert (false); // "No candidate found, should never happen (refineone.)";
-	return FLT_MAX;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
-{
-    for (int region = 0; region < NREGIONS_ONE; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-            palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_ONE][NINDICES];
-
-    generate_palette_unquantized(endpts, palette);
-
-    float toterr = 0;
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
-{
-    for (int region=0; region<NREGIONS_ONE; ++region)
-    {
-        int np = 0;
-        Vector3 colors[Tile::TILE_TOTAL];
-        Vector3 mean(0,0,0);
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x,y,shapeindex) == region)
-                {
-                    colors[np] = tile.data[y][x];
-                    mean += tile.data[y][x];
-                    ++np;
-                }
-            }
-        }
-
-        // handle simple cases
-        if (np == 0)
-        {
-            Vector3 zero(0,0,0);
-            endpts[region].A = zero;
-            endpts[region].B = zero;
-            continue;
-        }
-        else if (np == 1)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[0];
-            continue;
-        }
-        else if (np == 2)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[1];
-            continue;
-        }
-
-        mean /= float(np);
-
-        Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-        // project each pixel value along the principal direction
-        float minp = FLT_MAX, maxp = -FLT_MAX;
-        for (int i = 0; i < np; i++)
-        {
-            float dp = dot(colors[i]-mean, direction);
-            if (dp < minp) minp = dp;
-            if (dp > maxp) maxp = dp;
-        }
-
-        // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-        endpts[region].A = mean + minp*direction;
-        endpts[region].B = mean + maxp*direction;
-
-        // clamp endpoints
-        // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-        // shape based on endpoints being clamped
-        Utils::clamp(endpts[region].A);
-        Utils::clamp(endpts[region].B);
-    }
-
-    return map_colors(tile, shapeindex, endpts);
-}
-
-float ZOH::compressone(const Tile &t, char *block)
-{
-    int shapeindex_best = 0;
-    FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
-    float msebest = FLT_MAX;
-
-    /*
-		collect the mse values that are within 5% of the best values
-		optimize each one and choose the best
-	*/
-    // hack for now -- just use the best value WORK
-    for (int i=0; i<NSHAPES && msebest>0.0; ++i)
-    {
-        float mse = roughone(t, i, tempendpts);
-        if (mse < msebest)
-        {
-            msebest = mse;
-            shapeindex_best = i;
-            memcpy(endptsbest, tempendpts, sizeof(endptsbest));
-        }
-
-    }
-    return refineone(t, shapeindex_best, endptsbest, block);
-}

+ 0 - 883
3rdparty/nvtt/bc6h/zohtwo.cpp

@@ -1,883 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// two regions zoh compress/decompress code
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-/* optimization algorithm
-
-	get initial float endpoints
-	convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates.
-		note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible.
-	for each EC candidate in order from max precision to smaller precision
-		convert endpoints using the appropriate precision.
-		optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well.
-			(thus the endpoints and indices are in final form.)
-		transform and get bit delta.
-		if the bit delta fits, exit
-	if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever.
-		add a state variable to nvDebugCheck we only do this once.
-	convert to bit stream.
-	return the error.
-
-	Global optimization
-		order all tiles based on their errors
-		do something special for high-error tiles
-			the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image...
-
-	display an image that shows partitioning and precision selected for each tile
-*/
-
-#include "bits.h"
-#include "tile.h"
-#include "zoh.h"
-#include "zoh_utils.h"
-
-#include "nvmath/fitting.h"
-#include "nvmath/vector.inl"
-
-#include <string.h> // strlen
-#include <float.h> // FLT_MAX
-
-using namespace nv;
-using namespace ZOH;
-
-#define NINDICES	8
-#define	INDEXBITS	3
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#include "shapes_two.h"
-// use only the first 32 available shapes
-#undef NSHAPES
-#undef SHAPEBITS
-#define NSHAPES 32
-#define SHAPEBITS 5
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NDELTA	4
-
-struct Chanpat
-{
-    int prec[NDELTA];		// precision pattern for one channel
-};
-
-struct Pattern
-{
-    Chanpat chan[NCHANNELS];    // allow different bit patterns per channel -- but we still want constant precision per channel
-    int transformed;            // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-    int mode;                   // associated mode value
-    int modebits;               // number of mode bits
-    const char *encoding;       // verilog description of encoding for this mode
-};
-
-#define MAXMODEBITS	5
-#define	MAXMODES (1<<MAXMODEBITS)
-
-#define	NPATTERNS 10
-
-static const Pattern patterns[NPATTERNS] =
-{
-    11,5,5,5,	11,4,4,4,	11,4,4,4,	1,	0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    11,4,4,4,	11,5,5,5,	11,4,4,4,	1,	0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    11,4,4,4,	11,4,4,4,	11,5,5,5,	1,	0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
-    10,5,5,5,	10,5,5,5,	10,5,5,5,	1,	0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]",
-    9,5,5,5,	9,5,5,5,	9,5,5,5,	1,	0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]",
-    8,6,6,6,	8,5,5,5,	8,5,5,5,	1,	0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]",
-    8,5,5,5,	8,6,6,6,	8,5,5,5,	1,	0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]",
-    8,5,5,5,	8,5,5,5,	8,6,6,6,	1,	0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]",
-    7,6,6,6,	7,6,6,6,	7,6,6,6,	1,	0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]",
-    6,6,6,6,	6,6,6,6,	6,6,6,6,	0,	0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]",
-};
-
-// mapping of mode to the corresponding index in pattern
-// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these
-static const int mode_to_pat[MAXMODES] = {	
-    3,	// 0x00
-    8,	// 0x01
-    0,	// 0x02
-    -1,-1,-1,
-    1,	// 0x06
-    -1,-1,-1,
-    2,	// 0x0a
-    -1,-1,-1,
-    4,	// 0x0e
-    -1,-1,-1,
-    5,	// 0x12
-    -2,-1,-1,
-    6,	// 0x16
-    -2,-1,-1,
-    7,	// 0x1a
-    -2,-1,-1,
-    9,	// 0x1e
-    -2
-};
-
-#define	R_0(ep)	(ep)[0].A[i]
-#define	R_1(ep)	(ep)[0].B[i]
-#define	R_2(ep)	(ep)[1].A[i]
-#define	R_3(ep)	(ep)[1].B[i]
-#define	MASK(n)	((1<<(n))-1)
-
-// compress endpoints
-static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p)
-{
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
-            R_2(out) = (R_2(in) - R_0(in)) & MASK(p.chan[i].prec[2]);
-            R_3(out) = (R_3(in) - R_0(in)) & MASK(p.chan[i].prec[3]);
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
-            R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
-            R_2(out) = R_2(in) & MASK(p.chan[i].prec[2]);
-            R_3(out) = R_3(in) & MASK(p.chan[i].prec[3]);
-        }
-    }
-}
-
-// decompress endpoints
-static void decompress_endpts(const ComprEndpts in[NREGIONS_TWO], IntEndpts out[NREGIONS_TWO], const Pattern &p)
-{
-    bool issigned = Utils::FORMAT == SIGNED_F16;
-
-    if (p.transformed)
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            int t;
-            t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-            t = SIGN_EXTEND(R_2(in), p.chan[i].prec[2]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_2(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-            t = SIGN_EXTEND(R_3(in), p.chan[i].prec[3]);
-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
-            R_3(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
-        }
-    }
-    else
-    {
-        for (int i=0; i<NCHANNELS; ++i)
-        {
-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
-            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
-            R_2(out) = issigned ? SIGN_EXTEND(R_2(in),p.chan[i].prec[2]) : R_2(in);
-            R_3(out) = issigned ? SIGN_EXTEND(R_3(in),p.chan[i].prec[3]) : R_3(in);
-        }
-    }
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS_TWO], int prec, IntEndpts q_endpts[NREGIONS_TWO])
-{
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-    {
-        q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
-        q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
-        q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
-        q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
-        q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
-        q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
-    }
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndpts endpts[NREGIONS_TWO], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-    {
-        int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-        int x = POS_TO_X(position);
-        int y = POS_TO_Y(position);
-        nvDebugCheck(REGION(x,y,shapeindex) == region);		// double check the table
-        if (indices[y][x] & HIGH_INDEXBIT)
-        {
-            // high bit is set, swap the endpts and indices for this region
-            int t;
-            for (int i=0; i<NCHANNELS; ++i)
-            {
-                t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-            }
-
-            for (int y = 0; y < Tile::TILE_H; y++)
-                for (int x = 0; x < Tile::TILE_W; x++)
-                    if (REGION(x,y,shapeindex) == region)
-                        indices[y][x] = NINDICES - 1 - indices[y][x];
-        }
-    }
-}
-
-// endpoints fit only if the compression was lossless
-static bool endpts_fit(const IntEndpts orig[NREGIONS_TWO], const ComprEndpts compressed[NREGIONS_TWO], const Pattern &p)
-{
-    IntEndpts uncompressed[NREGIONS_TWO];
-
-    decompress_endpts(compressed, uncompressed, p);
-
-    for (int j=0; j<NREGIONS_TWO; ++j)
-    {
-	for (int i=0; i<NCHANNELS; ++i)
-	{
-            if (orig[j].A[i] != uncompressed[j].A[i]) return false;
-            if (orig[j].B[i] != uncompressed[j].B[i]) return false;
-        }
-    }
-    return true;
-}
-
-static void write_header(const ComprEndpts endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, Bits &out)
-{
-    // interpret the verilog backwards and process it
-    int m = p.mode;
-    int d = shapeindex;
-    int rw = endpts[0].A[0], rx = endpts[0].B[0], ry = endpts[1].A[0], rz = endpts[1].B[0];
-    int gw = endpts[0].A[1], gx = endpts[0].B[1], gy = endpts[1].A[1], gz = endpts[1].B[1];
-    int bw = endpts[0].A[2], bx = endpts[0].B[2], by = endpts[1].A[2], bz = endpts[1].B[2];
-    int ptr = int(strlen(p.encoding));
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-        switch(field)
-        {
-        case FIELD_M:	out.write( m >> endbit, len); break;
-        case FIELD_D:	out.write( d >> endbit, len); break;
-        case FIELD_RW:	out.write(rw >> endbit, len); break;
-        case FIELD_RX:	out.write(rx >> endbit, len); break;
-        case FIELD_RY:	out.write(ry >> endbit, len); break;
-        case FIELD_RZ:	out.write(rz >> endbit, len); break;
-        case FIELD_GW:	out.write(gw >> endbit, len); break;
-        case FIELD_GX:	out.write(gx >> endbit, len); break;
-        case FIELD_GY:	out.write(gy >> endbit, len); break;
-        case FIELD_GZ:	out.write(gz >> endbit, len); break;
-        case FIELD_BW:	out.write(bw >> endbit, len); break;
-        case FIELD_BX:	out.write(bx >> endbit, len); break;
-        case FIELD_BY:	out.write(by >> endbit, len); break;
-        case FIELD_BZ:	out.write(bz >> endbit, len); break;
-        default: nvUnreachable();
-        }
-    }
-}
-
-static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p)
-{
-    // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
-    int mode = in.read(2);
-    if (mode != 0x00 && mode != 0x01)
-        mode = (in.read(3) << 2) | mode;
-
-    int pat_index = mode_to_pat[mode];
-
-    if (pat_index == -2)
-        return false;		// reserved mode found
-
-    nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
-    nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
-
-    p = patterns[pat_index];
-
-    int d;
-    int rw, rx, ry, rz;
-    int gw, gx, gy, gz;
-    int bw, bx, by, bz;
-
-    d = 0;
-    rw = rx = ry = rz = 0;
-    gw = gx = gy = gz = 0;
-    bw = bx = by = bz = 0;
-
-    int ptr = int(strlen(p.encoding));
-
-    while (ptr)
-    {
-        Field field;
-        int endbit, len;
-
-		// !!!UNDONE: get rid of string parsing!!!
-        Utils::parse(p.encoding, ptr, field, endbit, len);
-
-        switch(field)
-        {
-        case FIELD_M:	break;	// already processed so ignore
-        case FIELD_D:	 d |= in.read(len) << endbit; break;
-        case FIELD_RW:	rw |= in.read(len) << endbit; break;
-        case FIELD_RX:	rx |= in.read(len) << endbit; break;
-        case FIELD_RY:	ry |= in.read(len) << endbit; break;
-        case FIELD_RZ:	rz |= in.read(len) << endbit; break;
-        case FIELD_GW:	gw |= in.read(len) << endbit; break;
-        case FIELD_GX:	gx |= in.read(len) << endbit; break;
-        case FIELD_GY:	gy |= in.read(len) << endbit; break;
-        case FIELD_GZ:	gz |= in.read(len) << endbit; break;
-        case FIELD_BW:	bw |= in.read(len) << endbit; break;
-        case FIELD_BX:	bx |= in.read(len) << endbit; break;
-        case FIELD_BY:	by |= in.read(len) << endbit; break;
-        case FIELD_BZ:	bz |= in.read(len) << endbit; break;
-        default: nvUnreachable();
-        }
-    }
-
-    nvDebugCheck (in.getptr() == 128 - 46);
-
-    shapeindex = d;
-    endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz;
-    endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz;
-    endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz;
-
-    return true;
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-    int positions[NREGIONS_TWO];
-
-    for (int r = 0; r < NREGIONS_TWO; ++r)
-        positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        bool match = false;
-
-        for (int r = 0; r < NREGIONS_TWO; ++r)
-            if (positions[r] == pos) { match = true; break; }
-
-        out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-    }
-}
-
-static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-    Bits out(block, ZOH::BITSIZE);
-
-    write_header(compr_endpts, shapeindex, p, out);
-
-    write_indices(indices, shapeindex, out);
-
-    nvDebugCheck(out.getptr() == ZOH::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
-{
-    // scale endpoints
-    int a, b;			// really need a IntVector3...
-
-    a = Utils::unquantize(endpts.A[0], prec);
-    b = Utils::unquantize(endpts.B[0], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[1], prec);
-    b = Utils::unquantize(endpts.B[1], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-
-    a = Utils::unquantize(endpts.A[2], prec);
-    b = Utils::unquantize(endpts.B[2], prec);
-
-    // interpolate
-    for (int i = 0; i < NINDICES; ++i)
-        palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-    int positions[NREGIONS_TWO];
-
-    for (int r = 0; r < NREGIONS_TWO; ++r)
-        positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-    {
-        int x = POS_TO_X(pos);
-        int y = POS_TO_Y(pos);
-
-        bool match = false;
-
-        for (int r = 0; r < NREGIONS_TWO; ++r)
-            if (positions[r] == pos) { match = true; break; }
-
-        indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-    }
-}
-
-void ZOH::decompresstwo(const char *block, Tile &t)
-{
-    Bits in(block, ZOH::BITSIZE);
-
-    Pattern p;
-    IntEndpts endpts[NREGIONS_TWO];
-    ComprEndpts compr_endpts[NREGIONS_TWO];
-    int shapeindex;
-
-    if (!read_header(in, compr_endpts, shapeindex, p))
-    {
-        // reserved mode, return all zeroes
-        for (int y = 0; y < Tile::TILE_H; y++)
-            for (int x = 0; x < Tile::TILE_W; x++)
-                t.data[y][x] = Vector3(0.0f);
-
-        return;
-    }
-
-    decompress_endpts(compr_endpts, endpts, p);
-
-    Vector3 palette[NREGIONS_TWO][NINDICES];
-    for (int r = 0; r < NREGIONS_TWO; ++r)
-        generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
-
-    int indices[Tile::TILE_H][Tile::TILE_W];
-
-    read_indices(in, shapeindex, indices);
-
-    nvDebugCheck(in.getptr() == ZOH::BITSIZE);
-
-    // lookup
-    for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-        t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
-{
-    Vector3 palette[NINDICES];
-    float toterr = 0;
-    Vector3 err;
-
-    generate_palette_quantized(endpts, prec, palette);
-
-    for (int i = 0; i < np; ++i)
-    {
-        float err, besterr;
-
-        besterr = Utils::norm(colors[i], palette[0]) * importance[i];
-
-        for (int j = 1; j < NINDICES && besterr > 0; ++j)
-        {
-            err = Utils::norm(colors[i], palette[j]) * importance[i];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec, 
-                           int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_TWO][NINDICES];
-
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-    {
-        generate_palette_quantized(endpts[region], prec, &palette[region][0]);
-        toterr[region] = 0;
-    }
-
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]);
-        indices[y][x] = 0;
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]);
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-            {
-                besterr = err;
-                indices[y][x] = i;
-            }
-        }
-        toterr[region] += besterr;
-    }
-}
-
-static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
-                          float old_err, int do_b)
-{
-    // we have the old endpoints: old_endpts
-    // we have the perturbed endpoints: new_endpts
-    // we have the temporary endpoints: temp_endpts
-
-    IntEndpts temp_endpts;
-    float min_err = old_err;		// start with the best current error
-    int beststep;
-
-    // copy real endpoints so we can perturb them
-    for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
-
-    // do a logarithmic search for the best error for this endpoint (which)
-    for (int step = 1 << (prec-1); step; step >>= 1)
-    {
-        bool improved = false;
-        for (int sign = -1; sign <= 1; sign += 2)
-        {
-            if (do_b == 0)
-            {
-                temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-                if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-                    continue;
-            }
-            else
-            {
-                temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-                if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-                    continue;
-            }
-
-            float err = map_colors(colors, importance, np, temp_endpts, prec);
-
-            if (err < min_err)
-            {
-                improved = true;
-                min_err = err;
-                beststep = sign * step;
-            }
-        }
-        // if this was an improvement, move the endpoint and continue search from there
-        if (improved)
-        {
-            if (do_b == 0)
-                new_endpts.A[ch] += beststep;
-            else
-                new_endpts.B[ch] += beststep;
-        }
-    }
-    return min_err;
-}
-
-static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
-{
-    float opt_err = orig_err;
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        opt_endpts.A[ch] = orig_endpts.A[ch];
-        opt_endpts.B[ch] = orig_endpts.B[ch];
-    }
-    /*
-        err0 = perturb(rgb0, delta0)
-        err1 = perturb(rgb1, delta1)
-        if (err0 < err1)
-            if (err0 >= initial_error) break
-            rgb0 += delta0
-            next = 1
-        else
-            if (err1 >= initial_error) break
-            rgb1 += delta1
-            next = 0
-        initial_err = map()
-        for (;;)
-            err = perturb(next ? rgb1:rgb0, delta)
-            if (err >= initial_err) break
-            next? rgb1 : rgb0 += delta
-            initial_err = err
-    */
-    IntEndpts new_a, new_b;
-    IntEndpts new_endpt;
-    int do_b;
-
-    // now optimize each channel separately
-    for (int ch = 0; ch < NCHANNELS; ++ch)
-    {
-        // figure out which endpoint when perturbed gives the most improvement and start there
-        // if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1);	// perturb endpt B
-
-        if (err0 < err1)
-        {
-            if (err0 >= opt_err)
-                continue;
-
-            opt_endpts.A[ch] = new_a.A[ch];
-            opt_err = err0;
-            do_b = 1;		// do B next
-        }
-        else
-        {
-            if (err1 >= opt_err)
-                continue;
-            opt_endpts.B[ch] = new_b.B[ch];
-            opt_err = err1;
-            do_b = 0;		// do A next
-        }
-
-        // now alternate endpoints and keep trying until there is no improvement
-        for (;;)
-        {
-            float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
-            if (err >= opt_err)
-                break;
-            if (do_b == 0)
-                opt_endpts.A[ch] = new_endpt.A[ch];
-            else
-                opt_endpts.B[ch] = new_endpt.B[ch];
-            opt_err = err;
-            do_b = 1 - do_b;	// now move the other endpoint
-        }
-    }
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO], 
-                            const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO])
-{
-    Vector3 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-    float err = 0;
-
-    for (int region=0; region<NREGIONS_TWO; ++region)
-    {
-        // collect the pixels in the region
-        int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++)
-            for (int x = 0; x < tile.size_x; x++)
-                if (REGION(x,y,shapeindex) == region)
-                {
-            pixels[np] = tile.data[y][x];
-            importance[np] = tile.importance_map[y][x];
-            ++np;
-        }
-
-        optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
-    }
-}
-
-/* optimization algorithm
-    for each pattern
-        convert endpoints using pattern precision
-        assign indices and get initial error
-        compress indices (and possibly reorder endpoints)
-        transform endpoints
-        if transformed endpoints fit pattern
-            get original endpoints back
-            optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-            compress new indices
-            transform new endpoints
-            if new endpoints fit pattern AND if error is improved
-                emit compressed block with new data
-            else
-                emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block)
-{
-    float orig_err[NREGIONS_TWO], opt_err[NREGIONS_TWO], orig_toterr, opt_toterr;
-    IntEndpts orig_endpts[NREGIONS_TWO], opt_endpts[NREGIONS_TWO];
-    ComprEndpts compr_orig[NREGIONS_TWO], compr_opt[NREGIONS_TWO];
-    int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-    for (int sp = 0; sp < NPATTERNS; ++sp)
-    {
-        // precisions for all channels need to be the same
-        for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
-
-        quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
-        assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
-        swap_indices(orig_endpts, orig_indices, shapeindex_best);
-        compress_endpts(orig_endpts, compr_orig, patterns[sp]);
-        if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
-        {
-            optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
-            assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
-            swap_indices(opt_endpts, opt_indices, shapeindex_best);
-            compress_endpts(opt_endpts, compr_opt, patterns[sp]);
-            orig_toterr = opt_toterr = 0;
-            for (int i=0; i < NREGIONS_TWO; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-            if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
-            {
-                emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
-                return opt_toterr;
-            }
-            else
-            {
-                // either it stopped fitting when we optimized it, or there was no improvement
-                // so go back to the unoptimized endpoints which we know will fit
-                emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
-                return orig_toterr;
-            }
-        }
-    }
-    nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
-	return FLT_MAX;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
-{
-    for (int region = 0; region < NREGIONS_TWO; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-            palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_TWO])
-{
-    // build list of possibles
-    Vector3 palette[NREGIONS_TWO][NINDICES];
-
-    generate_palette_unquantized(endpts, palette);
-
-    float toterr = 0;
-    Vector3 err;
-
-    for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-        int region = REGION(x,y,shapeindex);
-        float err, besterr;
-
-        besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
-
-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
-        {
-            err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
-
-            if (err > besterr)	// error increased, so we're done searching
-                break;
-            if (err < besterr)
-                besterr = err;
-        }
-        toterr += besterr;
-    }
-    return toterr;
-}
-
-float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO])
-{
-    for (int region=0; region<NREGIONS_TWO; ++region)
-    {
-        int np = 0;
-        Vector3 colors[Tile::TILE_TOTAL];
-        Vector3 mean(0,0,0);
-
-        for (int y = 0; y < tile.size_y; y++)
-            for (int x = 0; x < tile.size_x; x++)
-                if (REGION(x,y,shapeindex) == region)
-                {
-            colors[np] = tile.data[y][x];
-            mean += tile.data[y][x];
-            ++np;
-        }
-
-        // handle simple cases
-        if (np == 0)
-        {
-            Vector3 zero(0,0,0);
-            endpts[region].A = zero;
-            endpts[region].B = zero;
-            continue;
-        }
-        else if (np == 1)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[0];
-            continue;
-        }
-        else if (np == 2)
-        {
-            endpts[region].A = colors[0];
-            endpts[region].B = colors[1];
-            continue;
-        }
-
-        mean /= float(np);
-
-        Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-        // project each pixel value along the principal direction
-        float minp = FLT_MAX, maxp = -FLT_MAX;
-        for (int i = 0; i < np; i++)
-        {
-            float dp = dot(colors[i]-mean, direction);
-            if (dp < minp) minp = dp;
-            if (dp > maxp) maxp = dp;
-        }
-
-        // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-        endpts[region].A = mean + minp*direction;
-        endpts[region].B = mean + maxp*direction;
-
-        // clamp endpoints
-        // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-        // shape based on endpoints being clamped
-        Utils::clamp(endpts[region].A);
-        Utils::clamp(endpts[region].B);
-    }
-
-    return map_colors(tile, shapeindex, endpts);
-}
-
-float ZOH::compresstwo(const Tile &t, char *block)
-{
-    int shapeindex_best = 0;
-    FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO];
-    float msebest = FLT_MAX;
-
-    /*
-    collect the mse values that are within 5% of the best values
-    optimize each one and choose the best
-    */
-    // hack for now -- just use the best value WORK
-    for (int i=0; i<NSHAPES && msebest>0.0; ++i)
-    {
-        float mse = roughtwo(t, i, tempendpts);
-        if (mse < msebest)
-        {
-            msebest = mse;
-            shapeindex_best = i;
-            memcpy(endptsbest, tempendpts, sizeof(endptsbest));
-        }
-
-    }
-    return refinetwo(t, shapeindex_best, endptsbest, block);
-}
-

+ 0 - 264
3rdparty/nvtt/bc7/avpcl.cpp

@@ -1,264 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// the avpcl compressor and decompressor
-
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-// global flags
-bool AVPCL::flag_premult = false;
-bool AVPCL::flag_nonuniform = false;
-bool AVPCL::flag_nonuniform_ati = false;
-
-// global mode
-bool AVPCL::mode_rgb = false;		// true if image had constant alpha = 255
-
-void AVPCL::compress(const Tile &t, char *block)
-{
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	float mse_mode0 = AVPCL::compress_mode0(t, tempblock);		if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode1 = AVPCL::compress_mode1(t, tempblock);		if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode2 = AVPCL::compress_mode2(t, tempblock);		if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode3 = AVPCL::compress_mode3(t, tempblock);		if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode4 = AVPCL::compress_mode4(t, tempblock);		if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode5 = AVPCL::compress_mode5(t, tempblock);		if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode6 = AVPCL::compress_mode6(t, tempblock);		if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-	float mse_mode7 = AVPCL::compress_mode7(t, tempblock);		if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
-		
-	/*if (errfile)
-	{
-		float errs[21];
-		int nerrs = 8;
-		errs[0] = mse_mode0; 
-		errs[1] = mse_mode1; 
-		errs[2] = mse_mode2; 
-		errs[3] = mse_mode3; 
-		errs[4] = mse_mode4; 
-		errs[5] = mse_mode5; 
-		errs[6] = mse_mode6; 
-		errs[7] = mse_mode7;
-		if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs)
-			throw "Write error on error file";
-	}*/
-}
-
-/*
-static int getbit(char *b, int start)
-{
-	if (start < 0 || start >= 128) return 0; // out of range
-
-	int ix = start >> 3;
-	return (b[ix] & (1 << (start & 7))) != 0;
-}
-
-static int getbits(char *b, int start, int len)
-{
-	int out = 0;
-	for (int i=0; i<len; ++i)
-		out |= getbit(b, start+i) << i;
-	return out;
-}
-
-static void setbit(char *b, int start, int bit)
-{
-	if (start < 0 || start >= 128) return; // out of range
-
-	int ix = start >> 3;
-
-	if (bit & 1)
-		b[ix] |= (1 << (start & 7));
-	else
-		b[ix] &= ~(1 << (start & 7));
-}
-
-static void setbits(char *b, int start, int len, int bits)
-{
-	for (int i=0; i<len; ++i)
-		setbit(b, start+i, bits >> i);
-}
-*/
-
-void AVPCL::decompress(const char *cblock, Tile &t)
-{
-	char block[AVPCL::BLOCKSIZE];
-	memcpy(block, cblock, AVPCL::BLOCKSIZE);
-
-	switch(getmode(block))
-	{
-	case 0:	AVPCL::decompress_mode0(block, t);	break;
-	case 1:	AVPCL::decompress_mode1(block, t);	break;
-	case 2:	AVPCL::decompress_mode2(block, t);	break;
-	case 3:	AVPCL::decompress_mode3(block, t);	break;
-	case 4:	AVPCL::decompress_mode4(block, t);	break;
-	case 5:	AVPCL::decompress_mode5(block, t);	break;
-	case 6:	AVPCL::decompress_mode6(block, t);	break;
-	case 7:	AVPCL::decompress_mode7(block, t);	break;
-	case 8: // return a black tile if you get a reserved mode
-		for (int y=0; y<Tile::TILE_H; ++y)
-			for (int x=0; x<Tile::TILE_W; ++x)
-				t.data[y][x].set(0, 0, 0, 0);
-		break;
-	default: nvUnreachable();
-	}
-}
-
-/*
-void AVPCL::compress(string inf, string avpclf, string errf)
-{
-	Array2D<RGBA> pixels;
-	int w, h;
-	char block[AVPCL::BLOCKSIZE];
-
-	Targa::read(inf, pixels, w, h);
-	FILE *avpclfile = fopen(avpclf.c_str(), "wb");
-	if (avpclfile == NULL) throw "Unable to open .avpcl file for write";
-	FILE *errfile = NULL;
-	if (errf != "")
-	{
-		errfile = fopen(errf.c_str(), "wb");
-		if (errfile == NULL) throw "Unable to open error file for write";
-	}
-
-	// Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set)
-	if (AVPCL::flag_premult)
-	{
-		if (AVPCL::mode_rgb)
-		{
-			AVPCL::flag_premult = false;
-			cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl;
-		}
-	}
-
-	// stuff for progress bar O.o
-	int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
-	int tilecnt = 0;
-	clock_t start, prev, cur;
-
-	start = prev = clock();
-
-	// convert to tiles and compress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, float(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; }
-
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			t.insert(pixels, x, y);
-
-			AVPCL::compress(t, block, errfile);
-			if (fwrite(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
-				throw "File error on write";
-
-			// progress bar
-			++tilecnt;
-		}
-	}
-
-	cur = clock();
-	printf("\nTotal time to compress: %.2f seconds\n\n", float(cur-start)/CLOCKS_PER_SEC);		// advance to next line finally
-
-	if (fclose(avpclfile)) throw "Close failed on .avpcl file";
-	if (errfile && fclose(errfile)) throw "Close failed on error file";
-}
-
-static int str2int(std::string s) 
-{
-	int thing;
-	std::stringstream str (stringstream::in | stringstream::out);
-	str << s;
-	str >> thing;
-	return thing;
-}
-
-// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height
-static void extract(string avpclf, int &w, int &h, bool &mode_rgb)
-{
-	size_t n = avpclf.rfind('.', avpclf.length()-1);
-	size_t n1 = avpclf.rfind('-', n-1);
-	size_t n2 = avpclf.rfind('-', n1-1);
-	size_t n3 = avpclf.rfind('-', n2-1);
-	//	...-wwww-hhhh-RGB[A].avpcl
-	//     ^    ^    ^      ^
-	//     n3   n2   n1     n n3<n2<n1<n
-	string width = avpclf.substr(n3+1, n2-n3-1);
-	w = str2int(width);
-	string height = avpclf.substr(n2+1, n1-n2-1);
-	h = str2int(height);
-	string mode = avpclf.substr(n1+1, n-n1-1);
-	mode_rgb = mode == "RGB";
-}
-
-static int modehist[8];
-
-static void stats(char block[AVPCL::BLOCKSIZE])
-{
-	int m = AVPCL::getmode(block);
-	modehist[m]++;
-}
-
-static void printstats()
-{
-	printf("\nMode histogram: "); for (int i=0; i<8; ++i) { printf("%d,", modehist[i]); }
-	printf("\n");
-}
-
-void AVPCL::decompress(string avpclf, string outf)
-{
-	Array2D<RGBA> pixels;
-	int w, h;
-	char block[AVPCL::BLOCKSIZE];
-
-	extract(avpclf, w, h, AVPCL::mode_rgb);
-	FILE *avpclfile = fopen(avpclf.c_str(), "rb");
-	if (avpclfile == NULL) throw "Unable to open .avpcl file for read";
-	pixels.resizeErase(h, w);
-
-	// convert to tiles and decompress each tile
-	for (int y=0; y<h; y+=Tile::TILE_H)
-	{
-		int ysize = min(Tile::TILE_H, h-y);
-		for (int x=0; x<w; x+=Tile::TILE_W)
-		{
-			int xsize = min(Tile::TILE_W, w-x);
-			Tile t(xsize, ysize);
-
-			if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
-				throw "File error on read";
-
-			stats(block);	// collect statistics
-		
-			AVPCL::decompress(block, t);
-
-			t.extract(pixels, x, y);
-		}
-	}
-	if (fclose(avpclfile)) throw "Close failed on .avpcl file";
-
-	Targa::write(outf, pixels, w, h);
-
-	printstats();	// print statistics
-}
-*/

+ 0 - 99
3rdparty/nvtt/bc7/avpcl.h

@@ -1,99 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_H
-#define _AVPCL_H
-
-#include "tile.h"
-#include "bits.h"
-
-#define	DISABLE_EXHAUSTIVE	1	// define this if you don't want to spend a lot of time on exhaustive compression
-#define	USE_ZOH_INTERP		1	// use zoh interpolator, otherwise use exact avpcl interpolators
-#define	USE_ZOH_INTERP_ROUNDED 1	// use the rounded versions!
-
-namespace AVPCL {
-
-static const int NREGIONS_TWO	= 2;
-static const int NREGIONS_THREE	= 3;
-
-static const int BLOCKSIZE=16;
-static const int BITSIZE=128;
-
-// global flags
-extern bool flag_premult;
-extern bool flag_nonuniform;
-extern bool flag_nonuniform_ati;
-
-// global mode
-extern bool mode_rgb;		// true if image had constant alpha = 255
-
-void compress(const Tile &t, char *block);
-void decompress(const char *block, Tile &t);
-
-float compress_mode0(const Tile &t, char *block);
-void decompress_mode0(const char *block, Tile &t);
-
-float compress_mode1(const Tile &t, char *block);
-void decompress_mode1(const char *block, Tile &t);
-
-float compress_mode2(const Tile &t, char *block);
-void decompress_mode2(const char *block, Tile &t);
-
-float compress_mode3(const Tile &t, char *block);
-void decompress_mode3(const char *block, Tile &t);
-
-float compress_mode4(const Tile &t, char *block);
-void decompress_mode4(const char *block, Tile &t);
-
-float compress_mode5(const Tile &t, char *block);
-void decompress_mode5(const char *block, Tile &t);
-
-float compress_mode6(const Tile &t, char *block);
-void decompress_mode6(const char *block, Tile &t);
-
-float compress_mode7(const Tile &t, char *block);
-void decompress_mode7(const char *block, Tile &t);
-
-inline int getmode(Bits &in)
-{
-	int mode = 0;
-
-	if (in.read(1))			mode = 0;
-	else if (in.read(1))	mode = 1;
-	else if (in.read(1))	mode = 2;
-	else if (in.read(1))	mode = 3;
-	else if (in.read(1))	mode = 4;
-	else if (in.read(1))	mode = 5;
-	else if (in.read(1))	mode = 6;
-	else if (in.read(1))	mode = 7;
-	else mode = 8;	// reserved
-	return mode;
-}
-inline int getmode(const char *block)
-{
-	int bits = block[0], mode = 0;
-
-	if (bits & 1) mode = 0;
-	else if ((bits&3) == 2) mode = 1;
-	else if ((bits&7) == 4) mode = 2;
-	else if ((bits & 0xF) == 8) mode = 3;
-	else if ((bits & 0x1F) == 16) mode = 4;
-	else if ((bits & 0x3F) == 32) mode = 5;
-	else if ((bits & 0x7F) == 64) mode = 6;
-	else if ((bits & 0xFF) == 128) mode = 7;
-	else mode = 8;	// reserved
-	return mode;
-}
-
-}
-
-#endif

+ 0 - 1066
3rdparty/nvtt/bc7/avpcl_mode0.cpp

@@ -1,1066 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-//  x1		444.1x6 16p 45b (3bi)
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_three.h"
-
-// use only the first 16 available shapes
-#undef NSHAPES
-#undef SHAPEBITS
-#define NSHAPES 16
-#define SHAPEBITS 4
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	8
-#define	INDEXBITS	3
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-    const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red			green			blue			xfm	mode  mb
-	4,4,4,4,4,4,	4,4,4,4,4,4,	4,4,4,4,4,4,	0,	0x1, 1, "",	// really 444.1 x 6
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-// endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits
-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.A[j] & 1;
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 16);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.B[j] & 1;
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 16);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGB full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 83);
-}
-
-static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 83);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGB endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode0(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB_2 new_a, new_b;
-	IntEndptsRGB_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-			float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-			float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-    nvAssert(false); // throw "No candidate found, should never happen (mode avpcl 0).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-// for this mode, we assume alpha = 255 constant and compress only the RGB portion.
-// however, we do the error check against the actual alpha values supplied for the tile.
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode0(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-

+ 0 - 1047
3rdparty/nvtt/bc7/avpcl_mode1.cpp

@@ -1,1047 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x10	(666x2).1 (666x2).1 64p 3bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_two.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	2		// number of different lsb modes per region. since we have one .1 per region, that can have 2 values
-
-#define NINDICES	8
-#define	INDEXBITS	3
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		xfm	mode  mb
-	6,6,6,6,	6,6,6,6,	6,6,6,6,	0,	0x2, 2, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	6,6,6, 6,6,6, 6,6,6, 6,6,6,	
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-
-static void transform_forward(IntEndptsRGB_1 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-// endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits
-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.A[j] & 1;
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		onescnt += endpts.B[j] & 1;
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 64);
-		nvAssert (compr_endpts.B[j] < 64);
-	}
-	compr_endpts.lsb = onescnt >= 3;
-}
-
-static void uncompress_one(const IntEndptsRGB_1& compr_endpts, IntEndptsRGB& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGB_1 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_1 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_1 q_endpts[NREGIONS])
-{
-	IntEndptsRGB full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB_1 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB_1 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-
-static void write_header(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-		out.write(endpts[i].lsb, 1);
-
-	nvAssert (out.getptr() == 82);
-}
-
-static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-		endpts[i].lsb  = in.read(1);
-	
-	nvAssert (in.getptr() == 82);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB_1 &endpts_1, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGB endpts;
-
-	uncompress_one(endpts_1, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// note: don't simplify to a + ((b-a)*i + BIAS)/DENOM as that doesn't work due to the way C handles integer division of negatives
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-// sign extend but only if it was transformed
-static void sign_extend(Pattern &p, IntEndptsRGB_1 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode1(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB_1 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB_1 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB_1 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB_1 new_a, new_b;
-	IntEndptsRGB_1 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB_1 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.lsb = lsbmode;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-			float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB_1 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			//nvAssert(opt_toterr <= orig_toterr);
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 1).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode1(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-

+ 0 - 1004
3rdparty/nvtt/bc7/avpcl_mode2.cpp

@@ -1,1004 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x100 555x6 64p 2bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_three.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define NINDICES	4
-#define	INDEXBITS	2
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	6
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red			green			blue			xfm	mode  mb
-	5,5,5,5,5,5,	5,5,5,5,5,5,	5,5,5,5,5,5,	0,	0x4, 3, "",
-};
-
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS_THREE];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-#define	R_0	ep[0].A[i]
-#define	R_1 ep[0].B[i]
-#define	R_2 ep[1].A[i]
-#define	R_3	ep[1].B[i]
-
-static void transform_forward(IntEndptsRGB ep[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGB; ++i)
-	{
-		R_1 -= R_3; R_2 -= R_3; R_0 -= R_3;
-	}
-}
-
-static void transform_inverse(IntEndptsRGB ep[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGB; ++i)
-	{
-		R_0 += R_3; R_2 += R_3; R_1 += R_3;
-	}
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, IntEndptsRGB q_endpts[NREGIONS_THREE])
-{
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	{
-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB endpts[NREGIONS_THREE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB endpts[NREGIONS_THREE], const Pattern &p)
-{
-	return true;
-}
-
-
-static void write_header(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS_THREE; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]);
-		}
-	nvAssert (out.getptr() == 99);
-}
-
-static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS_THREE; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]);
-		}
-	nvAssert (in.getptr() == 99);
-}
-
-
-// WORK PLACEHOLDER -- keep it simple for now
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS_THREE];
-
-	for (int r = 0; r < NREGIONS_THREE; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS_THREE; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS_THREE];
-
-	for (int r = 0; r < NREGIONS_THREE; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS_THREE; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-// sign extend but only if it was transformed
-static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE])
-{
-	nvAssert (p.transformed != 0);
-
-	for (int i=0; i<NCHANNELS_RGB; ++i)
-	{
-		// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
-		endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
-		endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
-		endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
-		endpts[2].A[i] = SIGN_EXTEND(endpts[2].A[i], p.chan[i].nbitsizes[4]);
-		endpts[2].B[i] = SIGN_EXTEND(endpts[2].B[i], p.chan[i].nbitsizes[5]);
-	}
-}
-
-void AVPCL::decompress_mode2(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB endpts[NREGIONS_THREE];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS_THREE][NINDICES];
-	for (int r = 0; r < NREGIONS_THREE; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS_THREE][NINDICES];
-
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB new_a, new_b;
-	IntEndptsRGB new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_THREE], 
-							const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB temp_in, temp_out;
-
-	for (int region=0; region<NREGIONS_THREE; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// make sure we have a valid error for temp_in
-		// we didn't change temp_in, so orig_err[region] is still valid
-		float temp_in_err = orig_err[region];
-
-		// now try to optimize these endpoints
-		float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-		// if we find an improvement, update the best so far and correct the output endpoints and errors
-		if (temp_out_err < best_err)
-		{
-			best_err = temp_out_err;
-			opt_err[region] = temp_out_err;
-			opt_endpts[region] = temp_out;
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_THREE], char *block)
-{
-	float orig_err[NREGIONS_THREE], opt_err[NREGIONS_THREE], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB orig_endpts[NREGIONS_THREE], opt_endpts[NREGIONS_THREE];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS_THREE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 2).";
-	return FLT_MAX;
-
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES])
-{
-	for (int region = 0; region < NREGIONS_THREE; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS_THREE][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE])
-{
-	for (int region=0; region<NREGIONS_THREE; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode2(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS_THREE];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-

+ 0 - 1059
3rdparty/nvtt/bc7/avpcl_mode3.cpp

@@ -1,1059 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x1000 777.1x4 64p 2bi (30b)
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_two.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	4
-#define	INDEXBITS	2
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-#define	NREGIONS  2
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		xfm	mode  mb
-	7,7,7,7,	7,7,7,7,	7,7,7,7,	0,	0x8, 4, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGB];
-	int endpt_b_prec[NCHANNELS_RGB];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	7,7,7, 7,7,7, 7,7,7, 7,7,7,
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.A[j] & 1;
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 128);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		onescnt += endpts.B[j] & 1;
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 128);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGB full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
-static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGB; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 98);
-}
-
-static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGB; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 98);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGB endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	// constant alpha
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = 255.0f;
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode3(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGB_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-            float err = Utils::metric4(colors[i], palette[j]) * importance[i];
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, 
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGB_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGB_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGB_2 new_a, new_b;
-	IntEndptsRGB_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGB_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[2];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				if (np < 2) alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*Vector4(direction, 0);
-		endpts[region].B = mean + maxp*Vector4(direction, 0);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode3(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-

+ 0 - 1214
3rdparty/nvtt/bc7/avpcl_mode4.cpp

@@ -1,1214 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x10000 2r 1i 555x2 6x2 2bi 3bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
-// array 0 is always the RGB array and array 1 is always the A array
-#define	NINDEXARRAYS	2
-#define	INDEXARRAY_RGB	0
-#define INDEXARRAY_A	1
-#define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-#define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-
-#define NINDICES3	8
-#define	INDEXBITS3	3
-#define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
-#define	DENOM3		(NINDICES3-1)
-#define	BIAS3		(DENOM3/2)
-
-#define NINDICES2	4
-#define	INDEXBITS2	2
-#define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
-#define	DENOM2		(NINDICES2-1)
-#define	BIAS2		(DENOM2/2)
-
-#define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
-#define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
-#define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
-#define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
-#define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
-
-#define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
-#define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
-#define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
-#define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
-#define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
-
-#define	NSHAPES	1
-
-static int shapes[NSHAPES] =
-{
-	0x0000,
-};
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define NREGIONS	1			// keep the region stuff in just in case...
-
-// encoded index compression location: region 0 is always at 0,0.
-
-#define	NBITSIZES	2			// one endpoint pair
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	TRANSFORM_MODE_ALPHA	1
-#define	TRANSFORM_MODE_RGB	2
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		alpha	xfm	mode  mb encoding
-	5,5,		5,5,		5,5,		6,6,	0x0, 0x10, 5, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	5,5,5,6,	5,5,5,6,
-};
-
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-#define	R_0	ep[0].A[i]
-#define	R_1 ep[0].B[i]
-
-static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 -= R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 -= R_0;
-	}
-}
-
-static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 += R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 += R_0;
-	}
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
-		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
-
-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
-		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	int index_positions[NREGIONS];
-
-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int x = index_positions[region] & 3;
-		int y = (index_positions[region] >> 2) & 3;
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-
-		// swap RGB
-		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
-		}
-
-		// swap A
-		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
-{
-	// ignore shapeindex
-	out.write(p.mode, p.modebits);
-	out.write(rotatemode, ROTATEMODE_BITS);
-	out.write(indexmode, INDEXMODE_BITS);
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (out.getptr() == 50);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	p = patterns[pat_index];
-
-	shapeindex = 0;		// we don't have any
-
-	rotatemode = in.read(ROTATEMODE_BITS);
-	indexmode = in.read(INDEXMODE_BITS);
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (in.getptr() == 50);
-}
-
-static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
-}
-
-static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
-}
-
-static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
-
-	write_indices(indices, shapeindex, indexmode, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
-{
-	// scale endpoints for RGB
-	int a, b;
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
-
-	// interpolate R
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
-
-	// interpolate G
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
-
-	// interpolate B
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
-
-	// interpolate A
-	for (int i = 0; i < NINDICES_A(indexmode); ++i)
-		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
-
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGBA; ++i)
-	{
-		if (p.transform_mode)
-		{
-			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
-			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
-			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
-			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
-		}
-	}
-}
-
-static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
-{
-	out.size_x = in.size_x;
-	out.size_y = in.size_y;
-
-	for (int y=0; y<in.size_y; ++y)
-	for (int x=0; x<in.size_x; ++x)
-	{
-		float t;
-		out.data[y][x] = in.data[y][x];
-
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: break;
-		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		default: nvUnreachable();
-		}
-	}
-}
-
-void AVPCL::decompress_mode4(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA endpts[NREGIONS];
-	int shapeindex, pat_index, rotatemode, indexmode;
-
-	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
-	
-	sign_extend(p, endpts);
-
-	if (p.transform_mode)
-		transform_inverse(p.transform_mode, endpts);
-
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-
-	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indexmode, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	Tile temp(t.size_x, t.size_y);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
-
-	rotate_tile(temp, rotatemode, t);
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
-// exceeds what we already have
-static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	Vector3 palette_rgb[NINDICES3];	// could be nindices2
-	float palette_a[NINDICES3];	// could be nindices2
-	float toterr = 0;
-
-	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
-
-	Vector3 rgb;
-	float a;
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
-
-		rgb.x = (colors[i]).x;
-		rgb.y = (colors[i]).y;
-		rgb.z = (colors[i]).z;
-		a = (colors[i]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = Utils::metric1(a, palette_a[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					palette_alpha = palette_a[j];
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			toterr += besterr;
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-		else
-		{
-			// do RGB index
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					bestindex = j;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
-			toterr += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector3 rgb;
-	float a;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		rgb.x = (tile.data[y][x]).x;
-		rgb.y = (tile.data[y][x]).y;
-		rgb.z = (tile.data[y][x]).z;
-		a = (tile.data[y][x]).w;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = Utils::metric1(a, palette_a[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-					palette_alpha = palette_a[region][i];
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;
-		}
-		else
-		{
-			// do RGB index first as it has the alpha
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-					bestindex = i;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
-			toterr[region] += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-		}
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, 
-						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					indices[j][i] = temp_indices[j][i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		for (int j=0; j<NINDEXARRAYS; ++j)
-		for (int i=0; i<np; ++i)
-			indices[j][i] = good_indices[j][i];
-	}
-
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA new_a, new_b;
-	IntEndptsRGBA new_endpt;
-	int do_b;
-	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-		float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[j][i] = temp_indices0[j][i];
-					nvAssert (orig_indices[j][i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS], 
-							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA temp_in, temp_out;
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// make sure we have a valid error for temp_in
-		// we didn't change temp_in, so orig_err[region] is still valid
-		float temp_in_err = orig_err[region];
-
-		// now try to optimize these endpoints
-        float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-		// if we find an improvement, update the best so far and correct the output endpoints and errors
-		if (temp_out_err < best_err)
-		{
-			best_err = temp_out_err;
-			opt_err[region] = temp_out_err;
-			opt_endpts[region] = temp_out;
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-
-		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
-
-		if (patterns[sp].transform_mode)
-			transform_forward(patterns[sp].transform_mode, orig_endpts);
-
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transform_mode)
-				transform_inverse(patterns[sp].transform_mode, orig_endpts);
-
-			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-
-			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
-
-			if (patterns[sp].transform_mode)
-				transform_forward(patterns[sp].transform_mode, opt_endpts);
-
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transform_mode)
-					transform_forward(patterns[sp].transform_mode, orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 4).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-// compute initial endpoints for the "RGB" portion and the "A" portion. 
-// Note these channels may have been rotated.
-static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		float mina = FLT_MAX, maxa = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-
-			dp = alphas[i] - mean.w;
-			if (dp < mina) mina = dp;
-			if (dp > maxa) maxa = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + Vector4(minp*direction, mina);
-		endpts[region].B = mean + Vector4(maxp*direction, maxa);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-}
-
-float AVPCL::compress_mode4(const Tile &t, char *block)
-{
-	FltEndpts endpts[NREGIONS];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-	int shape = 0;
-	Tile t1;
-
-	// try all rotations. refine tries the 2 different indexings.
-	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
-	{
-		rotate_tile(t, r, t1);
-		rough(t1, shape, endpts);
-		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
-		{
-			float mse = refine(t1, shape, r, i, endpts, tempblock);
-			if (mse < msebest)
-			{
-				memcpy(block, tempblock, sizeof(tempblock));
-				msebest = mse;
-			}
-		}
-	}
-	return msebest;
-}

+ 0 - 1216
3rdparty/nvtt/bc7/avpcl_mode5.cpp

@@ -1,1216 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x100000 2r 777x2 8x2 2bi 2bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
-// array 0 is always the RGB array and array 1 is always the A array
-#define	NINDEXARRAYS	2
-#define	INDEXARRAY_RGB	0
-#define INDEXARRAY_A	1
-#define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-#define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
-
-#define NINDICES3	4
-#define	INDEXBITS3	2
-#define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
-#define	DENOM3		(NINDICES3-1)
-#define	BIAS3		(DENOM3/2)
-
-#define NINDICES2	4
-#define	INDEXBITS2	2
-#define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
-#define	DENOM2		(NINDICES2-1)
-#define	BIAS2		(DENOM2/2)
-
-#define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
-#define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
-#define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
-#define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
-#define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
-
-#define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
-#define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
-#define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
-#define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
-#define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
-
-#define	NSHAPES	1
-
-static int shapes[NSHAPES] =
-{
-	0x0000,
-};
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define NREGIONS	1			// keep the region stuff in just in case...
-
-// encoded index compression location: region 0 is always at 0,0.
-
-#define	NBITSIZES	2			// one endpoint pair
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	TRANSFORM_MODE_ALPHA	1
-#define	TRANSFORM_MODE_RGB	2
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		alpha	xfm	mode  mb encoding
-	7,7,		7,7,		7,7,		8,8,	0x0, 0x20, 6, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	7,7,7,8,	7,7,7,8,
-};
-
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-#define	R_0	ep[0].A[i]
-#define	R_1 ep[0].B[i]
-
-static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 -= R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 -= R_0;
-	}
-}
-
-static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
-{
-	int i;
-
-	if (transform_mode & TRANSFORM_MODE_RGB)
-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
-			R_1 += R_0;
-	if (transform_mode & TRANSFORM_MODE_ALPHA)
-	{
-		i = CHANNEL_A;
-		R_1 += R_0;
-	}
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
-		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
-
-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
-		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	int index_positions[NREGIONS];
-
-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int x = index_positions[region] & 3;
-		int y = (index_positions[region] >> 2) & 3;
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-
-		// swap RGB
-		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
-		}
-
-		// swap A
-		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
-{
-	// ignore shapeindex
-	out.write(p.mode, p.modebits);
-	out.write(rotatemode, ROTATEMODE_BITS);
-//	out.write(indexmode, INDEXMODE_BITS);
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (out.getptr() == 66);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	p = patterns[pat_index];
-
-	shapeindex = 0;		// we don't have any
-
-	rotatemode = in.read(ROTATEMODE_BITS);
-
-	indexmode = 0;		// we don't have any
-
-	for (int i=0; i<NREGIONS; ++i)
-		for (int j=0; j<NCHANNELS_RGBA; ++j)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
-		}
-	nvAssert (in.getptr() == 66);
-}
-
-static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
-}
-
-static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
-{
-	// the indices we shorten is always index 0
-
-	// do the 2 bit indices first
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
-
-	// then the 3 bit indices
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
-}
-
-static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
-
-	write_indices(indices, shapeindex, indexmode, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
-{
-	// scale endpoints for RGB
-	int a, b;
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
-
-	// interpolate R
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
-
-	// interpolate G
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
-
-	// interpolate B
-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
-		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
-
-	// interpolate A
-	for (int i = 0; i < NINDICES_A(indexmode); ++i)
-		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
-}
-
-static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NCHANNELS_RGBA; ++i)
-	{
-		if (p.transform_mode)
-		{
-			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
-			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
-			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
-			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
-		}
-	}
-}
-
-static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
-{
-	out.size_x = in.size_x;
-	out.size_y = in.size_y;
-
-	for (int y=0; y<in.size_y; ++y)
-	for (int x=0; x<in.size_x; ++x)
-	{
-		float t;
-		out.data[y][x] = in.data[y][x];
-
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: break;
-		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
-		default: nvUnreachable();
-		}
-	}
-}
-
-void AVPCL::decompress_mode5(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA endpts[NREGIONS];
-	int shapeindex, pat_index, rotatemode, indexmode;
-
-	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
-	
-	sign_extend(p, endpts);
-
-	if (p.transform_mode)
-		transform_inverse(p.transform_mode, endpts);
-
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-
-	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indexmode, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	Tile temp(t.size_x, t.size_y);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
-
-	rotate_tile(temp, rotatemode, t);
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
-// exceeds what we already have
-static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	Vector3 palette_rgb[NINDICES3];	// could be nindices2
-	float palette_a[NINDICES3];	// could be nindices2
-	float toterr = 0;
-
-	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
-
-	Vector3 rgb;
-	float a;
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
-
-		rgb.x = (colors[i]).x;
-		rgb.y = (colors[i]).y;
-		rgb.z = (colors[i]).z;
-		a = (colors[i]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = Utils::metric1(a, palette_a[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					palette_alpha = palette_a[j];
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			toterr += besterr;
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-		else
-		{
-			// do RGB index
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					bestindex = j;
-					indices[INDEXARRAY_RGB][i] = j;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
-			toterr += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][i] = j;
-				}
-			}
-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
-			if (toterr > current_besterr)
-			{
-				// fill out bogus index values so it's initialized at least
-				for (int k = i; k < np; ++k)
-				{
-					indices[INDEXARRAY_RGB][k] = -1;
-					indices[INDEXARRAY_A][k] = -1;
-				}
-				return FLT_MAX;
-			}
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector3 rgb;
-	float a;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr;
-		float palette_alpha = 0, tile_alpha = 0;
-
-		rgb.x = (tile.data[y][x]).x;
-		rgb.y = (tile.data[y][x]).y;
-		rgb.z = (tile.data[y][x]).z;
-		a = (tile.data[y][x]).w;
-
-		if(AVPCL::flag_premult)
-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
-
-		// compute the two indices separately
-		// if we're doing premultiplied alpha, we need to choose first the index that
-		// determines the alpha value, and then do the other index
-
-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
-		{
-			// do A index first as it has the alpha
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = Utils::metric1(a, palette_a[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-					palette_alpha = palette_a[region][i];
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-
-			// do RGB index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;
-		}
-		else
-		{
-			// do RGB index first as it has the alpha
-			besterr = FLT_MAX;
-			int bestindex;
-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
-											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_RGB][y][x] = i;
-					bestindex = i;
-				}
-			}
-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
-			toterr[region] += besterr;
-
-			// do A index
-			besterr = FLT_MAX;
-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
-			{
-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
-											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
-
-				if (err > besterr)	// error increased, so we're done searching
-					break;
-				if (err < besterr)
-				{
-					besterr = err;
-					indices[INDEXARRAY_A][y][x] = i;
-				}
-			}
-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
-		}
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
-						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					indices[j][i] = temp_indices[j][i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	for (int j=0; j<NINDEXARRAYS; ++j)
-	for (int i=0; i<np; ++i)
-		indices[j][i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-					good_indices[j][i] = temp_indices[j][i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		for (int j=0; j<NINDEXARRAYS; ++j)
-		for (int i=0; i<np; ++i)
-			indices[j][i] = good_indices[j][i];
-	}
-
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA new_a, new_b;
-	IntEndptsRGBA new_endpt;
-	int do_b;
-	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
-	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int j=0; j<NINDEXARRAYS; ++j)
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[j][i] = temp_indices0[j][i];
-				nvAssert (orig_indices[j][i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int j=0; j<NINDEXARRAYS; ++j)
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[j][i] = temp_indices0[j][i];
-					nvAssert (orig_indices[j][i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS], 
-							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA temp_in, temp_out;
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// make sure we have a valid error for temp_in
-		// we didn't change temp_in, so orig_err[region] is still valid
-		float temp_in_err = orig_err[region];
-
-		// now try to optimize these endpoints
-        float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-		// if we find an improvement, update the best so far and correct the output endpoints and errors
-		if (temp_out_err < best_err)
-		{
-			best_err = temp_out_err;
-			opt_err[region] = temp_out_err;
-			opt_endpts[region] = temp_out;
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-
-		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
-
-		if (patterns[sp].transform_mode)
-			transform_forward(patterns[sp].transform_mode, orig_endpts);
-
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transform_mode)
-				transform_inverse(patterns[sp].transform_mode, orig_endpts);
-
-			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-
-			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
-
-			if (patterns[sp].transform_mode)
-				transform_forward(patterns[sp].transform_mode, opt_endpts);
-
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transform_mode)
-					transform_forward(patterns[sp].transform_mode, orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-// compute initial endpoints for the "RGB" portion and the "A" portion. 
-// Note these channels may have been rotated.
-static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector3 colors[Tile::TILE_TOTAL];
-		float alphas[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x].xyz();
-				alphas[np] = tile.data[y][x].w;
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[0], alphas[0]);
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = Vector4(colors[0], alphas[0]);
-			endpts[region].B = Vector4(colors[1], alphas[1]);
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		float mina = FLT_MAX, maxa = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean.xyz(), direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-
-			dp = alphas[i] - mean.w;
-			if (dp < mina) mina = dp;
-			if (dp > maxa) maxa = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + Vector4(minp*direction, mina);
-		endpts[region].B = mean + Vector4(maxp*direction, maxa);
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-}
-
-float AVPCL::compress_mode5(const Tile &t, char *block)
-{
-	FltEndpts endpts[NREGIONS];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-	int shape = 0;
-	Tile t1;
-
-	// try all rotations. refine tries the 2 different indexings.
-	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
-	{
-		rotate_tile(t, r, t1);
-		rough(t1, shape, endpts);
-//		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
-		for (int i = 0; i < 1 && msebest > 0; ++i)
-		{
-			float mse = refine(t1, shape, r, i, endpts, tempblock);
-			if (mse < msebest)
-			{
-				memcpy(block, tempblock, sizeof(tempblock));
-				msebest = mse;
-			}
-		}
-	}
-	return msebest;
-}

+ 0 - 1055
3rdparty/nvtt/bc7/avpcl_mode6.cpp

@@ -1,1055 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x1000000 7777.1x2 4bi
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	16
-#define	INDEXBITS	4
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-#define	NSHAPES	1
-
-static int shapes[NSHAPES] =
-{
-	0x0000,
-};
-
-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
-
-#define	NREGIONS	1
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red	green	blue	alpha	mode  mb verilog
-	7,7,	7,7,	7,7,	7,7,	0x40, 7, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	7,7,7,7,	7,7,7,7,
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-/*
-we're using this table to assign lsbs
-abgr	>=2	correct
-0000	0	0
-0001	0	0
-0010	0	0
-0011	1	x1
-0100	0	0
-0101	1	x1
-0110	1	x1
-0111	1	1
-1000	0	0
-1001	1	x0
-1010	1	x0
-1011	1	1
-1100	1	x0
-1101	1	1
-1110	1	1
-1111	1	1
-
-we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8.
-I choose to assign the lsbs so that the rgb channels are as good as possible.
-*/
-
-// 8888 ->7777.1, use the "correct" column above to assign the lsb
-static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		// ignore the alpha channel in the count
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 128);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 128);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-
-static void uncompress_endpoints(const IntEndptsRGBA_2 compr_endpts[NREGIONS], IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGBA endpts[NREGIONS], IntEndptsRGBA_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGBA full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
-
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
-
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	int index_positions[NREGIONS];
-
-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int x = index_positions[region] & 3;
-		int y = (index_positions[region] >> 2) & 3;
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGBA; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 65);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	p = patterns[pat_index];
-
-	shapeindex = 0;		// we don't have any
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 65);
-}
-
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	nvAssert ((indices[0][0] & HIGH_INDEXBIT) == 0);
-
-	// the index we shorten is always index 0
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-	{
-		if (i==0)
-			out.write(indices[i>>2][i&3], INDEXBITS-1);	// write i..[2:0]
-		else
-			out.write(indices[i>>2][i&3], INDEXBITS);	// write i..[3:0]
-	}
-
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	// the index we shorten is always index 0
-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
-	{
-		if (i==0)
-			indices[i>>2][i&3] = in.read(INDEXBITS-1);	// read i..[1:0]
-		else
-			indices[i>>2][i&3] = in.read(INDEXBITS);	// read i..[2:0]
-	}
-}
-
-static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGBA endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
-}
-
-void AVPCL::decompress_mode6(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) :
-									     Utils::metric4premult(colors[i], palette[j]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) :
-										 Utils::metric4premult(tile.data[y][x], palette[region][i]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA_2 new_a, new_b;
-	IntEndptsRGBA_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// try all lsb modes as we search for better endpoints
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-
-     simplify the above given that there is no transform now and that endpoints will always fit
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-
-		optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-
-		assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-		// (nreed) Commented out asserts because they go off all the time...not sure why
-		//for (int i=0; i<NREGIONS; ++i)
-		//	nvAssert(expected_opt_err[i] == opt_err[i]);
-		swap_indices(opt_endpts, opt_indices, shapeindex_best);
-
-		orig_toterr = opt_toterr = 0;
-		for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-		//nvAssert(opt_toterr <= orig_toterr);
-
-		if (opt_toterr < orig_toterr)
-		{
-			emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-			return opt_toterr;
-		}
-		else
-		{
-			emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-			return orig_toterr;
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 6).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr;
-
-		besterr = Utils::metric4(tile.data[y][x], palette[region][0]);
-
-		for (int i = 1; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector4 colors[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x];
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[0];
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[1];
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean, direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*direction;
-		endpts[region].B = mean + maxp*direction;
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode6(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=1;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-

+ 0 - 1094
3rdparty/nvtt/bc7/avpcl_mode7.cpp

@@ -1,1094 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
-
-// x10000000 5555.1x4 64p 2bi (30b)
-
-#include "bits.h"
-#include "tile.h"
-#include "avpcl.h"
-#include "nvcore/debug.h"
-#include "nvmath/vector.inl"
-#include "nvmath/matrix.inl"
-#include "nvmath/fitting.h"
-#include "avpcl_utils.h"
-#include "endpts.h"
-#include <string.h>
-#include <float.h>
-
-#include "shapes_two.h"
-
-using namespace nv;
-using namespace AVPCL;
-
-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
-
-#define NINDICES	4
-#define	INDEXBITS	2
-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
-#define	DENOM		(NINDICES-1)
-#define	BIAS		(DENOM/2)
-
-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
-// stop without having to touch all shapes?
-
-#define	POS_TO_X(pos)	((pos)&3)
-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
-
-#define	NBITSIZES	(NREGIONS*2)
-#define	ABITINDEX(region)	(2*(region)+0)
-#define	BBITINDEX(region)	(2*(region)+1)
-
-struct ChanBits
-{
-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
-};
-
-struct Pattern
-{
-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
-	int mode;				// associated mode value
-	int modebits;			// number of mode bits
-	const char *encoding;			// verilog description of encoding for this mode
-};
-
-#define	NPATTERNS 1
-#define	NREGIONS  2
-
-static Pattern patterns[NPATTERNS] =
-{
-	// red		green		blue		alpha		xfm	mode  mb
-	5,5,5,5,	5,5,5,5,	5,5,5,5,	5,5,5,5,	0,	0x80, 8, "",
-};
-
-struct RegionPrec
-{
-	int	endpt_a_prec[NCHANNELS_RGBA];
-	int endpt_b_prec[NCHANNELS_RGBA];
-};
-
-struct PatternPrec
-{
-	RegionPrec region_precs[NREGIONS];
-};
-
-
-// this is the precision for each channel and region
-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
-static PatternPrec pattern_precs[NPATTERNS] =
-{
-	5,5,5,5,  5,5,5,5,  5,5,5,5,  5,5,5,5,
-};
-
-// return # of bits needed to store n. handle signed or unsigned cases properly
-static int nbits(int n, bool issigned)
-{
-	int nb;
-	if (n==0)
-		return 0;	// no bits needed for 0, signed or not
-	else if (n > 0)
-	{
-		for (nb=0; n; ++nb, n>>=1) ;
-		return nb + (issigned?1:0);
-	}
-	else
-	{
-		nvAssert (issigned);
-		for (nb=0; n<-1; ++nb, n>>=1) ;
-		return nb + 1;
-	}
-}
-
-static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS])
-{
-	nvUnreachable();
-}
-
-/*
-we're using this table to assign lsbs
-abgr	>=2	correct
-0000	0	0
-0001	0	0
-0010	0	0
-0011	1	x1
-0100	0	0
-0101	1	x1
-0110	1	x1
-0111	1	1
-1000	0	0
-1001	1	x0
-1010	1	x0
-1011	1	1
-1100	1	x0
-1101	1	1
-1110	1	1
-1111	1	1
-
-we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8.
-I choose to assign the lsbs so that the rgb channels are as good as possible.
-*/
-
-// 6666 ->5555.1, use the "correct" column above to assign the lsb
-static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts)
-{
-	int onescnt;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		// ignore the alpha channel in the count
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
-		compr_endpts.A[j] = endpts.A[j] >> 1;
-		nvAssert (compr_endpts.A[j] < 32);
-	}
-	compr_endpts.a_lsb = onescnt >= 2;
-
-	onescnt = 0;
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
-		compr_endpts.B[j] = endpts.B[j] >> 1;
-		nvAssert (compr_endpts.B[j] < 32);
-	}
-	compr_endpts.b_lsb = onescnt >= 2;
-}
-
-static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts)
-{
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-	{
-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
-	}
-}
-static void uncompress_endpoints(const IntEndptsRGBA_2 compr_endpts[NREGIONS], IntEndptsRGBA endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		uncompress_one(compr_endpts[i], endpts[i]);
-}
-
-static void compress_endpoints(const IntEndptsRGBA endpts[NREGIONS], IntEndptsRGBA_2 compr_endpts[NREGIONS])
-{
-	for (int i=0; i<NREGIONS; ++i)
-		compress_one(endpts[i], compr_endpts[i]);
-}
-
-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA_2 q_endpts[NREGIONS])
-{
-	IntEndptsRGBA full_endpts[NREGIONS];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
-		full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
-
-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
-		full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
-
-		compress_one(full_endpts[region], q_endpts[region]);
-	}
-}
-
-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
-static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
-
-		int x = POS_TO_X(position);
-		int y = POS_TO_Y(position);
-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
-		if (indices[y][x] & HIGH_INDEXBIT)
-		{
-			// high bit is set, swap the endpts and indices for this region
-			int t;
-			for (int i=0; i<NCHANNELS_RGBA; ++i) 
-			{
-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
-			}
-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
-
-			for (int y = 0; y < Tile::TILE_H; y++)
-			for (int x = 0; x < Tile::TILE_W; x++)
-				if (REGION(x,y,shapeindex) == region)
-					indices[y][x] = NINDICES - 1 - indices[y][x];
-		}
-	}
-}
-
-static bool endpts_fit(IntEndptsRGBA_2 endpts[NREGIONS], const Pattern &p)
-{
-	return true;
-}
-
-static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
-{
-	out.write(p.mode, p.modebits);
-	out.write(shapeindex, SHAPEBITS);
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		out.write(endpts[i].a_lsb, 1);
-		out.write(endpts[i].b_lsb, 1);
-	}
-
-	nvAssert (out.getptr() == 98);
-}
-
-static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
-{
-	int mode = AVPCL::getmode(in);
-
-	pat_index = 0;
-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
-	nvAssert (in.getptr() == patterns[pat_index].modebits);
-
-	shapeindex = in.read(SHAPEBITS);
-	p = patterns[pat_index];
-
-	for (int j=0; j<NCHANNELS_RGBA; ++j)
-		for (int i=0; i<NREGIONS; ++i)
-		{
-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
-		}
-	
-	for (int i=0; i<NREGIONS; ++i)
-	{
-		endpts[i].a_lsb  = in.read(1);
-		endpts[i].b_lsb  = in.read(1);
-	}
-
-	nvAssert (in.getptr() == 98);
-}
-
-// WORK PLACEHOLDER -- keep it simple for now
-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
-{
-	int positions[NREGIONS];
-
-	for (int r = 0; r < NREGIONS; ++r)
-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
-
-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
-	{
-		int x = POS_TO_X(pos);
-		int y = POS_TO_Y(pos);
-
-		bool match = false;
-
-		for (int r = 0; r < NREGIONS; ++r)
-			if (positions[r] == pos) { match = true; break; }
-
-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
-	}
-}
-
-static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
-{
-	Bits out(block, AVPCL::BITSIZE);
-
-	write_header(endpts, shapeindex, p, out);
-
-	write_indices(indices, shapeindex, out);
-
-	nvAssert(out.getptr() == AVPCL::BITSIZE);
-}
-
-static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
-{
-	IntEndptsRGBA endpts;
-
-	uncompress_one(endpts_2, endpts);
-
-	// scale endpoints
-	int a, b;			// really need a IntVec4...
-
-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space 
-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
-
-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); 
-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
-
-	// interpolate
-	for (int i = 0; i < NINDICES; ++i)
-		palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
-}
-
-// sign extend but only if it was transformed
-static void sign_extend(Pattern &p, IntEndptsRGBA_2 endpts[NREGIONS])
-{
-	nvUnreachable();
-}
-
-void AVPCL::decompress_mode7(const char *block, Tile &t)
-{
-	Bits in(block, AVPCL::BITSIZE);
-
-	Pattern p;
-	IntEndptsRGBA_2 endpts[NREGIONS];
-	int shapeindex, pat_index;
-
-	read_header(in, endpts, shapeindex, p, pat_index);
-	
-	if (p.transformed)
-	{
-		sign_extend(p, endpts);
-		transform_inverse(endpts);
-	}
-
-	Vector4 palette[NREGIONS][NINDICES];
-	for (int r = 0; r < NREGIONS; ++r)
-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
-
-	int indices[Tile::TILE_H][Tile::TILE_W];
-
-	read_indices(in, shapeindex, indices);
-
-	nvAssert(in.getptr() == AVPCL::BITSIZE);
-
-	// lookup
-	for (int y = 0; y < Tile::TILE_H; y++)
-	for (int x = 0; x < Tile::TILE_W; x++)
-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
-}
-
-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
-{
-	Vector4 palette[NINDICES];
-	float toterr = 0;
-	Vector4 err;
-
-	generate_palette_quantized(endpts, region_prec, palette);
-
-	for (int i = 0; i < np; ++i)
-	{
-		float err, besterr = FLT_MAX;
-
-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) :
-									     Utils::metric4premult(colors[i], palette[j]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[i] = j;
-			}
-		}
-		toterr += besterr;
-
-		// check for early exit
-		if (toterr > current_err)
-		{
-			// fill out bogus index values so it's initialized at least
-			for (int k = i; k < np; ++k)
-				indices[k] = -1;
-
-			return FLT_MAX;
-		}
-	}
-	return toterr;
-}
-
-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	for (int region = 0; region < NREGIONS; ++region)
-	{
-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
-		toterr[region] = 0;
-	}
-
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) :
-										 Utils::metric4premult(tile.data[y][x], palette[region][i]) ;
-
-			if (err > besterr)	// error increased, so we're done searching
-				break;
-			if (err < besterr)
-			{
-				besterr = err;
-				indices[y][x] = i;
-			}
-		}
-		toterr[region] += besterr;
-	}
-}
-
-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
-// this function returns either old_err or a value smaller (if it was successful in improving the error)
-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
-{
-	// we have the old endpoints: old_endpts
-	// we have the perturbed endpoints: new_endpts
-	// we have the temporary endpoints: temp_endpts
-
-	IntEndptsRGBA_2 temp_endpts;
-	float min_err = old_err;		// start with the best current error
-	int beststep;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	// copy real endpoints so we can perturb them
-	temp_endpts = new_endpts = old_endpts;
-
-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
-
-	// do a logarithmic search for the best error for this endpoint (which)
-	for (int step = 1 << (prec-1); step; step >>= 1)
-	{
-		bool improved = false;
-		for (int sign = -1; sign <= 1; sign += 2)
-		{
-			if (do_b == 0)
-			{
-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
-					continue;
-			}
-			else
-			{
-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
-					continue;
-			}
-
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
-
-			if (err < min_err)
-			{
-				improved = true;
-				min_err = err;
-				beststep = sign * step;
-				for (int i=0; i<np; ++i)
-					indices[i] = temp_indices[i];
-			}
-		}
-		// if this was an improvement, move the endpoint and continue search from there
-		if (improved)
-		{
-			if (do_b == 0)
-				new_endpts.A[ch] += beststep;
-			else
-				new_endpts.B[ch] += beststep;
-		}
-	}
-	return min_err;
-}
-
-// the larger the error the more time it is worth spending on an exhaustive search.
-// perturb the endpoints at least -3 to 3.
-// if err > 5000 perturb endpoints 50% of precision
-// if err > 1000 25%
-// if err > 200 12.5%
-// if err > 40  6.25%
-// for np = 16 -- adjust error thresholds as a function of np
-// always ensure endpoint ordering is preserved (no need to overlap the scan)
-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
-{
-	IntEndptsRGBA_2 temp_endpts;
-	float best_err = orig_err;
-	int aprec = region_prec.endpt_a_prec[ch];
-	int bprec = region_prec.endpt_b_prec[ch];
-	int good_indices[Tile::TILE_TOTAL];
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int i=0; i<np; ++i)
-		indices[i] = -1;
-
-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
-
-	if (orig_err == 0) return orig_err;
-
-	int adelta = 0, bdelta = 0;
-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
-	adelta = max(adelta, 3);
-	bdelta = max(bdelta, 3);
-
-#ifdef	DISABLE_EXHAUSTIVE
-	adelta = bdelta = 3;
-#endif
-
-	temp_endpts = opt_endpts;
-
-	// ok figure out the range of A and B
-	int alow = max(0, opt_endpts.A[ch] - adelta);
-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
-	int blow = max(0, opt_endpts.B[ch] - bdelta);
-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
-
-	// now there's no need to swap the ordering of A and B
-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
-
-	int amin, bmin;
-
-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
-	{
-		// keep a <= b
-		for (int a = alow; a <= ahigh; ++a)
-		for (int b = max(a, blow); b < bhigh; ++b)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err;
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	else
-	{
-		// keep b <= a
-		for (int b = blow; b < bhigh; ++b)
-		for (int a = max(b, alow); a <= ahigh; ++a)
-		{
-			temp_endpts.A[ch] = a;
-			temp_endpts.B[ch] = b;
-		
-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
-			if (err < best_err) 
-			{ 
-				amin = a; 
-				bmin = b; 
-				best_err = err; 
-				for (int i=0; i<np; ++i)
-					good_indices[i] = temp_indices[i];
-			}
-		}
-	}
-	if (best_err < orig_err)
-	{
-		opt_endpts.A[ch] = amin;
-		opt_endpts.B[ch] = bmin;
-		orig_err = best_err;
-		// if we actually improved, update the indices
-		for (int i=0; i<np; ++i)
-			indices[i] = good_indices[i];
-	}
-	return best_err;
-}
-
-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
-{
-	float opt_err = orig_err;
-
-	opt_endpts = orig_endpts;
-
-	/*
-		err0 = perturb(rgb0, delta0)
-		err1 = perturb(rgb1, delta1)
-		if (err0 < err1)
-			if (err0 >= initial_error) break
-			rgb0 += delta0
-			next = 1
-		else
-			if (err1 >= initial_error) break
-			rgb1 += delta1
-			next = 0
-		initial_err = map()
-		for (;;)
-			err = perturb(next ? rgb1:rgb0, delta)
-			if (err >= initial_err) break
-			next? rgb1 : rgb0 += delta
-			initial_err = err
-	*/
-	IntEndptsRGBA_2 new_a, new_b;
-	IntEndptsRGBA_2 new_endpt;
-	int do_b;
-	int orig_indices[Tile::TILE_TOTAL];
-	int new_indices[Tile::TILE_TOTAL];
-	int temp_indices0[Tile::TILE_TOTAL];
-	int temp_indices1[Tile::TILE_TOTAL];
-
-	// now optimize each channel separately
-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-		// figure out which endpoint when perturbed gives the most improvement and start there
-		// if we just alternate, we can easily end up in a local minima
-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
-
-		if (err0 < err1)
-		{
-			if (err0 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices0[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.A[ch] = new_a.A[ch];
-			opt_err = err0;
-			do_b = 1;		// do B next
-		}
-		else
-		{
-			if (err1 >= opt_err)
-				continue;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = orig_indices[i] = temp_indices1[i];
-				nvAssert (orig_indices[i] != -1);
-			}
-
-			opt_endpts.B[ch] = new_b.B[ch];
-			opt_err = err1;
-			do_b = 0;		// do A next
-		}
-		
-		// now alternate endpoints and keep trying until there is no improvement
-		for (;;)
-		{
-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
-			if (err >= opt_err)
-				break;
-
-			for (int i=0; i<np; ++i)
-			{
-				new_indices[i] = temp_indices0[i];
-				nvAssert (new_indices[i] != -1);
-			}
-
-			if (do_b == 0)
-				opt_endpts.A[ch] = new_endpt.A[ch];
-			else
-				opt_endpts.B[ch] = new_endpt.B[ch];
-			opt_err = err;
-			do_b = 1 - do_b;	// now move the other endpoint
-		}
-
-		// see if the indices have changed
-		int i;
-		for (i=0; i<np; ++i)
-			if (orig_indices[i] != new_indices[i])
-				break;
-
-		if (i<np)
-			ch = -1;	// start over
-	}
-
-	// finally, do a small exhaustive search around what we think is the global minima to be sure
-	// note this is independent of the above search, so we don't care about the indices from the above
-	// we don't care about the above because if they differ, so what? we've already started at ch=0
-	bool first = true;
-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
-	{
-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
-
-		if (new_err < opt_err)
-		{
-			opt_err = new_err;
-
-			if (first)
-			{
-				for (int i=0; i<np; ++i)
-				{
-					orig_indices[i] = temp_indices0[i];
-					nvAssert (orig_indices[i] != -1);
-				}
-				first = false;
-			}
-			else
-			{
-				// see if the indices have changed
-				int i;
-				for (i=0; i<np; ++i)
-					if (orig_indices[i] != temp_indices0[i])
-						break;
-
-				if (i<np)
-				{
-					ch = -1;	// start over
-					first = true;
-				}
-			}
-		}
-	}
-
-	return opt_err;
-}
-
-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
-							IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
-{
-	Vector4 pixels[Tile::TILE_TOTAL];
-    float importance[Tile::TILE_TOTAL];
-	IntEndptsRGBA_2 temp_in, temp_out;
-	int temp_indices[Tile::TILE_TOTAL];
-
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		// collect the pixels in the region
-		int np = 0;
-
-        for (int y = 0; y < tile.size_y; y++) {
-            for (int x = 0; x < tile.size_x; x++) {
-                if (REGION(x, y, shapeindex) == region) {
-                    pixels[np] = tile.data[y][x];
-                    importance[np] = tile.importance_map[y][x];
-                    np++;
-                }
-            }
-        }
-
-		opt_endpts[region] = temp_in = orig_endpts[region];
-		opt_err[region] = orig_err[region];
-
-		float best_err = orig_err[region];
-
-		// try all lsb modes as we search for better endpoints
-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
-		{
-			temp_in.a_lsb = lsbmode & 1;
-			temp_in.b_lsb = (lsbmode >> 1) & 1;
-
-			// make sure we have a valid error for temp_in
-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
-			float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
-
-			// now try to optimize these endpoints
-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
-
-			// if we find an improvement, update the best so far and correct the output endpoints and errors
-			if (temp_out_err < best_err)
-			{
-				best_err = temp_out_err;
-				opt_err[region] = temp_out_err;
-				opt_endpts[region] = temp_out;
-			}
-		}
-	}
-}
-
-/* optimization algorithm
-	for each pattern
-		convert endpoints using pattern precision
-		assign indices and get initial error
-		compress indices (and possibly reorder endpoints)
-		transform endpoints
-		if transformed endpoints fit pattern
-			get original endpoints back
-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
-			compress new indices
-			transform new endpoints
-			if new endpoints fit pattern AND if error is improved
-				emit compressed block with new data
-			else
-				emit compressed block with original data // to try to preserve maximum endpoint precision
-*/
-
-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
-{
-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
-	IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
-
-	for (int sp = 0; sp < NPATTERNS; ++sp)
-	{
-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
-		if (patterns[sp].transformed)
-			transform_forward(orig_endpts);
-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
-		if (endpts_fit(orig_endpts, patterns[sp]))
-		{
-			if (patterns[sp].transformed)
-				transform_inverse(orig_endpts);
-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
-			// (nreed) Commented out asserts because they go off all the time...not sure why
-			//for (int i=0; i<NREGIONS; ++i)
-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
-			if (patterns[sp].transformed)
-				transform_forward(opt_endpts);
-			orig_toterr = opt_toterr = 0;
-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
-			{
-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
-				return opt_toterr;
-			}
-			else
-			{
-				// either it stopped fitting when we optimized it, or there was no improvement
-				// so go back to the unoptimized endpoints which we know will fit
-				if (patterns[sp].transformed)
-					transform_forward(orig_endpts);
-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
-				return orig_toterr;
-			}
-		}
-	}
-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 7).";
-	return FLT_MAX;
-}
-
-static void clamp(Vector4 &v)
-{
-	if (v.x < 0.0f) v.x = 0.0f;
-	if (v.x > 255.0f) v.x = 255.0f;
-	if (v.y < 0.0f) v.y = 0.0f;
-	if (v.y > 255.0f) v.y = 255.0f;
-	if (v.z < 0.0f) v.z = 0.0f;
-	if (v.z > 255.0f) v.z = 255.0f;
-	if (v.w < 0.0f) v.w = 0.0f;
-	if (v.w > 255.0f) v.w = 255.0f;
-}
-
-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
-{
-	for (int region = 0; region < NREGIONS; ++region)
-	for (int i = 0; i < NINDICES; ++i)
-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
-}
-
-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
-{
-	// build list of possibles
-	Vector4 palette[NREGIONS][NINDICES];
-
-	generate_palette_unquantized(endpts, palette);
-
-	float toterr = 0;
-	Vector4 err;
-
-	for (int y = 0; y < tile.size_y; y++)
-	for (int x = 0; x < tile.size_x; x++)
-	{
-		int region = REGION(x,y,shapeindex);
-		float err, besterr = FLT_MAX;
-
-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
-		{
-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
-
-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
-				break;
-			if (err < besterr)
-				besterr = err;
-		}
-		toterr += besterr;
-	}
-	return toterr;
-}
-
-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
-{
-	for (int region=0; region<NREGIONS; ++region)
-	{
-		int np = 0;
-		Vector4 colors[Tile::TILE_TOTAL];
-		Vector4 mean(0,0,0,0);
-
-		for (int y = 0; y < tile.size_y; y++)
-		for (int x = 0; x < tile.size_x; x++)
-			if (REGION(x,y,shapeindex) == region)
-			{
-				colors[np] = tile.data[y][x];
-				mean += tile.data[y][x];
-				++np;
-			}
-
-		// handle simple cases	
-		if (np == 0)
-		{
-			Vector4 zero(0,0,0,255.0f);
-			endpts[region].A = zero;
-			endpts[region].B = zero;
-			continue;
-		}
-		else if (np == 1)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[0];
-			continue;
-		}
-		else if (np == 2)
-		{
-			endpts[region].A = colors[0];
-			endpts[region].B = colors[1];
-			continue;
-		}
-
-		mean /= float(np);
-
-		Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
-
-		// project each pixel value along the principal direction
-		float minp = FLT_MAX, maxp = -FLT_MAX;
-		for (int i = 0; i < np; i++) 
-		{
-			float dp = dot(colors[i]-mean, direction);
-			if (dp < minp) minp = dp;
-			if (dp > maxp) maxp = dp;
-		}
-
-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
-		endpts[region].A = mean + minp*direction;
-		endpts[region].B = mean + maxp*direction;
-
-		// clamp endpoints
-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
-		// shape based on endpoints being clamped
-		clamp(endpts[region].A);
-		clamp(endpts[region].B);
-	}
-
-	return map_colors(tile, shapeindex, endpts);
-}
-
-static void swap(float *list1, int *list2, int i, int j)
-{
-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
-}
-
-float AVPCL::compress_mode7(const Tile &t, char *block)
-{
-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
-	const int NITEMS=NSHAPES/4;
-
-	// pick the best NITEMS shapes and refine these.
-	struct {
-		FltEndpts endpts[NREGIONS];
-	} all[NSHAPES];
-	float roughmse[NSHAPES];
-	int index[NSHAPES];
-	char tempblock[AVPCL::BLOCKSIZE];
-	float msebest = FLT_MAX;
-
-	for (int i=0; i<NSHAPES; ++i)
-	{
-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
-		index[i] = i;
-	}
-
-	// bubble sort -- only need to bubble up the first NITEMS items
-	for (int i=0; i<NITEMS; ++i)
-	for (int j=i+1; j<NSHAPES; ++j)
-		if (roughmse[i] > roughmse[j])
-			swap(roughmse, index, i, j);
-
-	for (int i=0; i<NITEMS && msebest>0; ++i)
-	{
-		int shape = index[i];
-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
-		if (mse < msebest)
-		{
-			memcpy(block, tempblock, sizeof(tempblock));
-			msebest = mse;
-		}
-	}
-	return msebest;
-}
-

+ 0 - 389
3rdparty/nvtt/bc7/avpcl_utils.cpp

@@ -1,389 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// Utility and common routines
-
-#include "avpcl_utils.h"
-#include "avpcl.h"
-#include "nvmath/vector.inl"
-#include <math.h>
-
-using namespace nv;
-using namespace AVPCL;
-
-static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
-static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
-
-int Utils::lerp(int a, int b, int i, int bias, int denom)
-{
-#ifdef	USE_ZOH_INTERP
-	nvAssert (denom == 3 || denom == 7 || denom == 15);
-	nvAssert (i >= 0 && i <= denom);
-	nvAssert (bias >= 0 && bias <= denom/2);
-	nvAssert (a >= 0 && b >= 0);
-
-	int round = 0;
-#ifdef	USE_ZOH_INTERP_ROUNDED
-	round = 32;
-#endif
-
-	switch (denom)
-	{
-	case 3:	denom *= 5; i *= 5;	// fall through to case 15
-	case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
-	case 7:	return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
-	default: nvUnreachable(); return 0;
-	}
-#else
-	return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom));		// simple exact interpolation
-#endif
-}
-
-Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
-{
-#ifdef	USE_ZOH_INTERP
-	nvAssert (denom == 3 || denom == 7 || denom == 15);
-	nvAssert (i >= 0 && i <= denom);
-	nvAssert (bias >= 0 && bias <= denom/2);
-//	nvAssert (a >= 0 && b >= 0);
-
-	// no need to bias these as this is an exact division
-
-	switch (denom)
-	{
-	case 3:	denom *= 5; i *= 5;	// fall through to case 15
-	case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
-	case 7:	return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
-	default: nvUnreachable(); return Vector4(0);
-	}
-#else
-	return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom));		// simple exact interpolation
-#endif
-}
-
-
-int Utils::unquantize(int q, int prec)
-{
-	int unq;
-
-	nvAssert (prec > 3);	// we only want to do one replicate
-
-#ifdef USE_ZOH_QUANT
-	if (prec >= 8)
-		unq = q;
-	else if (q == 0) 
-		unq = 0;
-	else if (q == ((1<<prec)-1)) 
-		unq = 255;
-	else
-		unq = (q * 256 + 128) >> prec;
-#else
-	// avpcl unquantizer -- bit replicate
-	unq = (q << (8-prec)) | (q >> (2*prec-8));
-#endif
-
-	return unq;
-}
-
-// quantize to the best value -- i.e., minimize unquantize error
-int Utils::quantize(float value, int prec)
-{
-	int q, unq;
-
-	nvAssert (prec > 3);	// we only want to do one replicate
-
-	unq = (int)floor(value + 0.5f);
-	nvAssert (unq <= 255);
-
-#ifdef USE_ZOH_QUANT
-	q = (prec >= 8) ? unq : (unq << prec) / 256;
-#else
-	// avpcl quantizer -- scale properly for best possible bit-replicated result
-	q = (unq * ((1<<prec)-1) + 127)/255;
-#endif
-
-	nvAssert (q >= 0 && q < (1 << prec));
-
-	return q;
-}
-
-float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
-{
-	Vector4 err = a - b;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
-float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
-{
-	Vector3 err = a - b;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else if (AVPCL::flag_nonuniform_ati)
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// adjust weights based on rotatemode
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: break;
-		case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
-		case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
-		case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
-		default: nvUnreachable();
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric1(const float a, const float b, int rotatemode)
-{
-	float err = a - b;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt, awt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else if (AVPCL::flag_nonuniform_ati)
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// adjust weights based on rotatemode
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
-		case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
-		case ROTATEMODE_RGBA_RABG: awt = gwt; break;
-		case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
-		default: nvUnreachable();
-		}
-
-		// weigh the components
-		err *= awt;
-	}
-
-	return err * err;
-}
-
-float Utils::premult(float r, float a)
-{
-	// note that the args are really integers stored in floats
-	int R = int(r), A = int(a);
-
-	nvAssert ((R==r) && (A==a));
-
-	return float((R*A + 127)/255);
-}
-
-static void premult4(Vector4& rgba)
-{
-	rgba.x = Utils::premult(rgba.x, rgba.w);
-	rgba.y = Utils::premult(rgba.y, rgba.w);
-	rgba.z = Utils::premult(rgba.z, rgba.w);
-}
-
-static void premult3(Vector3& rgb, float a)
-{
-	rgb.x = Utils::premult(rgb.x, a);
-	rgb.y = Utils::premult(rgb.y, a);
-	rgb.z = Utils::premult(rgb.z, a);
-}
-
-float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
-{
-	Vector4 pma = a, pmb = b;
-
-	premult4(pma);
-	premult4(pmb);
-
-	Vector4 err = pma - pmb;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
-{
-	Vector3 pma = rgb0, pmb = rgb1;
-
-	premult3(pma, a0);
-	premult3(pmb, a1);
-
-	Vector3 err = pma - pmb;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
-{
-	Vector3 pma = rgb0, pmb = rgb1;
-
-	switch(rotatemode)
-	{
-	case ROTATEMODE_RGBA_RGBA:
-		// this function isn't supposed to be called for this rotatemode
-		nvUnreachable();
-		break;
-	case ROTATEMODE_RGBA_AGBR:
-		pma.y = premult(pma.y, pma.x);
-		pma.z = premult(pma.z, pma.x);
-		pmb.y = premult(pmb.y, pmb.x);
-		pmb.z = premult(pmb.z, pmb.x);
-		break;
-	case ROTATEMODE_RGBA_RABG:
-		pma.x = premult(pma.x, pma.y);
-		pma.z = premult(pma.z, pma.y);
-		pmb.x = premult(pmb.x, pmb.y);
-		pmb.z = premult(pmb.z, pmb.y);
-		break;
-	case ROTATEMODE_RGBA_RGAB:
-		pma.x = premult(pma.x, pma.z);
-		pma.y = premult(pma.y, pma.z);
-		pmb.x = premult(pmb.x, pmb.z);
-		pmb.y = premult(pmb.y, pmb.z);
-		break;
-	default: nvUnreachable();
-	}
-
-	Vector3 err = pma - pmb;
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else /*if (AVPCL::flag_nonuniform_ati)*/
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// weigh the components
-		err.x *= rwt;
-		err.y *= gwt;
-		err.z *= bwt;
-	}
-
-	return lengthSquared(err);
-}
-
-float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
-{
-	float err = premult(rgb0, a0) - premult(rgb1, a1);
-
-	// if nonuniform, select weights and weigh away
-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
-	{
-		float rwt, gwt, bwt, awt;
-		if (AVPCL::flag_nonuniform)
-		{
-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
-		}
-		else if (AVPCL::flag_nonuniform_ati)
-		{
-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
-		}
-
-		// adjust weights based on rotatemode
-		switch(rotatemode)
-		{
-		case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
-		case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
-		case ROTATEMODE_RGBA_RABG: awt = gwt; break;
-		case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
-		default: nvUnreachable();
-		}
-
-		// weigh the components
-		err *= awt;
-	}
-
-	return err * err;
-}

+ 0 - 61
3rdparty/nvtt/bc7/avpcl_utils.h

@@ -1,61 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-// utility class holding common routines
-#ifndef _AVPCL_UTILS_H
-#define _AVPCL_UTILS_H
-
-#include "nvmath/vector.h"
-
-namespace AVPCL {
-
-inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); }
-
-static const int INDEXMODE_BITS				= 1;		// 2 different index modes
-static const int NINDEXMODES				= (1<<(INDEXMODE_BITS));
-static const int INDEXMODE_ALPHA_IS_3BITS	= 0;
-static const int INDEXMODE_ALPHA_IS_2BITS	= 1;
-
-static const int ROTATEMODE_BITS		= 2;		// 4 different rotate modes
-static const int NROTATEMODES			= (1<<(ROTATEMODE_BITS));
-static const int ROTATEMODE_RGBA_RGBA	= 0;
-static const int ROTATEMODE_RGBA_AGBR	= 1;
-static const int ROTATEMODE_RGBA_RABG	= 2;
-static const int ROTATEMODE_RGBA_RGAB	= 3;
-
-class Utils
-{
-public:
-	// error metrics
-	static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b);
-	static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode);
-	static float metric1(float a, float b, int rotatemode);
-
-	static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1);
-	static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1);
-	static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode);
-	static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode);
-
-	static float premult(float r, float a);
-
-	// quantization and unquantization
-	static int unquantize(int q, int prec);
-	static int quantize(float value, int prec);
-
-	// lerping
-	static int lerp(int a, int b, int i, int bias, int denom);
-	static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom);
-};
-
-}
-
-#endif

+ 0 - 76
3rdparty/nvtt/bc7/bits.h

@@ -1,76 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_BITS_H
-#define _AVPCL_BITS_H
-
-// read/write a bitstream
-
-#include "nvcore/debug.h"
-
-namespace AVPCL {
-
-class Bits
-{
-public:
-
-	Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
-	Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
-
-	void write(int value, int nbits) {
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		for (int i=0; i<nbits; ++i)
-			writeone(value>>i);
-	}
-	int read(int nbits) { 
-		nvAssert (nbits >= 0 && nbits < 32);
-		nvAssert (sizeof(int)>= 4);
-		int out = 0;
-		for (int i=0; i<nbits; ++i)
-			out |= readone() << i;
-		return out;
-	}
-	int getptr() { return bptr; }
-	void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
-	int getsize() { return bend; }
-
-private:
-	int	bptr;		// next bit to read
-	int bend;		// last written bit + 1
-	char *bits;		// ptr to user bit stream
-	const char *cbits;	// ptr to const user bit stream
-	int maxbits;	// max size of user bit stream
-	char readonly;	// 1 if this is a read-only stream
-
-	int readone() {
-		nvAssert (bptr < bend);
-		if (bptr >= bend) return 0;
-		int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
-		++bptr;
-		return bit != 0;
-	}
-	void writeone(int bit) {
-		nvAssert (!readonly); // "Writing a read-only bit stream"
-		nvAssert (bptr < maxbits);
-		if (bptr >= maxbits) return;
-		if (bit&1)
-			bits[bptr>>3] |= 1 << (bptr & 7);
-		else
-			bits[bptr>>3] &= ~(1 << (bptr & 7));
-		if (bptr++ >= bend) bend = bptr;
-	}
-};
-
-}
-
-#endif

+ 0 - 81
3rdparty/nvtt/bc7/endpts.h

@@ -1,81 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_ENDPTS_H
-#define _AVPCL_ENDPTS_H
-
-// endpoint definitions and routines to search through endpoint space
-
-#include "nvmath/vector.h"
-
-namespace AVPCL {
-
-static const int NCHANNELS_RGB	= 3;
-static const int NCHANNELS_RGBA	= 4;
-static const int CHANNEL_R		= 0;
-static const int CHANNEL_G		= 1;
-static const int CHANNEL_B		= 2;
-static const int CHANNEL_A		= 3;
-
-struct FltEndpts
-{
-	nv::Vector4	A;
-	nv::Vector4	B;
-};
-
-struct IntEndptsRGB
-{
-	int		A[NCHANNELS_RGB];
-	int		B[NCHANNELS_RGB];
-};
-
-struct IntEndptsRGB_1
-{
-	int		A[NCHANNELS_RGB];
-	int		B[NCHANNELS_RGB];
-	int		lsb;				// shared lsb for A and B
-};
-
-struct IntEndptsRGB_2
-{
-	int		A[NCHANNELS_RGB];
-	int		B[NCHANNELS_RGB];
-	int		a_lsb;				// lsb for A
-	int		b_lsb;				// lsb for B
-};
-
-
-struct IntEndptsRGBA
-{
-	int		A[NCHANNELS_RGBA];
-	int		B[NCHANNELS_RGBA];
-};
-
-struct IntEndptsRGBA_2
-{
-	int		A[NCHANNELS_RGBA];
-	int		B[NCHANNELS_RGBA];
-	int		a_lsb;				// lsb for A
-	int		b_lsb;				// lsb for B
-};
-
-struct IntEndptsRGBA_2a
-{
-	int		A[NCHANNELS_RGBA];
-	int		B[NCHANNELS_RGBA];
-	int		a_lsb;				// lsb for RGB channels of A
-	int		b_lsb;				// lsb for RGB channels of A
-};
-
-}
-
-#endif

+ 0 - 132
3rdparty/nvtt/bc7/shapes_three.h

@@ -1,132 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef	_AVPCL_SHAPES_THREE_H
-#define _AVPCL_SHAPES_THREE_H
-
-// shapes for 3 regions
-
-#define NREGIONS 3
-#define NSHAPES 64
-#define SHAPEBITS 6
-
-static int shapes[NSHAPES*16] = 
-{
-0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   0, 2, 2, 2,   
-0, 0, 1, 1,   0, 0, 1, 1,   2, 0, 0, 1,   0, 0, 2, 2,   
-0, 2, 2, 1,   2, 2, 1, 1,   2, 2, 1, 1,   0, 0, 1, 1,   
-2, 2, 2, 2,   2, 2, 2, 1,   2, 2, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 2, 2,   0, 0, 1, 1,   
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 2, 2,   0, 0, 1, 1,   
-1, 1, 2, 2,   0, 0, 2, 2,   1, 1, 1, 1,   2, 2, 1, 1,   
-1, 1, 2, 2,   0, 0, 2, 2,   1, 1, 1, 1,   2, 2, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 2,   
-0, 0, 0, 0,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 1, 2,   
-1, 1, 1, 1,   1, 1, 1, 1,   2, 2, 2, 2,   0, 0, 1, 2,   
-2, 2, 2, 2,   2, 2, 2, 2,   2, 2, 2, 2,   0, 0, 1, 2,   
-
-0, 1, 1, 2,   0, 1, 2, 2,   0, 0, 1, 1,   0, 0, 1, 1,   
-0, 1, 1, 2,   0, 1, 2, 2,   0, 1, 1, 2,   2, 0, 0, 1,   
-0, 1, 1, 2,   0, 1, 2, 2,   1, 1, 2, 2,   2, 2, 0, 0,   
-0, 1, 1, 2,   0, 1, 2, 2,   1, 2, 2, 2,   2, 2, 2, 0,   
-
-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 0,   0, 0, 2, 2,   
-0, 0, 1, 1,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 2, 2,   
-0, 1, 1, 2,   2, 0, 0, 1,   1, 1, 2, 2,   0, 0, 2, 2,   
-1, 1, 2, 2,   2, 2, 0, 0,   1, 1, 2, 2,   1, 1, 1, 1,   
-
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   1, 1, 0, 0,   
-0, 2, 2, 2,   2, 2, 2, 1,   0, 1, 2, 2,   2, 2, 1, 0,   
-0, 2, 2, 2,   2, 2, 2, 1,   0, 1, 2, 2,   2, 2, 1, 0,   
-
-0, 1, 2, 2,   0, 0, 1, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
-0, 1, 2, 2,   0, 0, 1, 2,   1, 2, 2, 1,   0, 1, 1, 0,   
-0, 0, 1, 1,   1, 1, 2, 2,   1, 2, 2, 1,   1, 2, 2, 1,   
-0, 0, 0, 0,   2, 2, 2, 2,   0, 1, 1, 0,   1, 2, 2, 1,   
-
-0, 0, 2, 2,   0, 1, 1, 0,   0, 0, 1, 1,   0, 0, 0, 0,   
-1, 1, 0, 2,   0, 1, 1, 0,   0, 1, 2, 2,   2, 0, 0, 0,   
-1, 1, 0, 2,   2, 0, 0, 2,   0, 1, 2, 2,   2, 2, 1, 1,   
-0, 0, 2, 2,   2, 2, 2, 2,   0, 0, 1, 1,   2, 2, 2, 1,   
-
-0, 0, 0, 0,   0, 2, 2, 2,   0, 0, 1, 1,   0, 1, 2, 0,   
-0, 0, 0, 2,   0, 0, 2, 2,   0, 0, 1, 2,   0, 1, 2, 0,   
-1, 1, 2, 2,   0, 0, 1, 2,   0, 0, 2, 2,   0, 1, 2, 0,   
-1, 2, 2, 2,   0, 0, 1, 1,   0, 2, 2, 2,   0, 1, 2, 0,   
-
-0, 0, 0, 0,   0, 1, 2, 0,   0, 1, 2, 0,   0, 0, 1, 1,   
-1, 1, 1, 1,   1, 2, 0, 1,   2, 0, 1, 2,   2, 2, 0, 0,   
-2, 2, 2, 2,   2, 0, 1, 2,   1, 2, 0, 1,   1, 1, 2, 2,   
-0, 0, 0, 0,   0, 1, 2, 0,   0, 1, 2, 0,   0, 0, 1, 1,   
-
-0, 0, 1, 1,   0, 1, 0, 1,   0, 0, 0, 0,   0, 0, 2, 2,   
-1, 1, 2, 2,   0, 1, 0, 1,   0, 0, 0, 0,   1, 1, 2, 2,   
-2, 2, 0, 0,   2, 2, 2, 2,   2, 1, 2, 1,   0, 0, 2, 2,   
-0, 0, 1, 1,   2, 2, 2, 2,   2, 1, 2, 1,   1, 1, 2, 2,   
-
-0, 0, 2, 2,   0, 2, 2, 0,   0, 1, 0, 1,   0, 0, 0, 0,   
-0, 0, 1, 1,   1, 2, 2, 1,   2, 2, 2, 2,   2, 1, 2, 1,   
-0, 0, 2, 2,   0, 2, 2, 0,   2, 2, 2, 2,   2, 1, 2, 1,   
-0, 0, 1, 1,   1, 2, 2, 1,   0, 1, 0, 1,   2, 1, 2, 1,   
-
-0, 1, 0, 1,   0, 2, 2, 2,   0, 0, 0, 2,   0, 0, 0, 0,   
-0, 1, 0, 1,   0, 1, 1, 1,   1, 1, 1, 2,   2, 1, 1, 2,   
-0, 1, 0, 1,   0, 2, 2, 2,   0, 0, 0, 2,   2, 1, 1, 2,   
-2, 2, 2, 2,   0, 1, 1, 1,   1, 1, 1, 2,   2, 1, 1, 2,   
-
-0, 2, 2, 2,   0, 0, 0, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   1, 1, 1, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   1, 1, 1, 2,   0, 1, 1, 0,   2, 1, 1, 2,   
-0, 2, 2, 2,   0, 0, 0, 2,   2, 2, 2, 2,   2, 1, 1, 2,   
-
-0, 1, 1, 0,   0, 0, 2, 2,   0, 0, 2, 2,   0, 0, 0, 0,   
-0, 1, 1, 0,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 0, 0,   
-2, 2, 2, 2,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 0, 0,   
-2, 2, 2, 2,   0, 0, 2, 2,   0, 0, 2, 2,   2, 1, 1, 2,   
-
-0, 0, 0, 2,   0, 2, 2, 2,   0, 1, 0, 1,   0, 1, 1, 1,   
-0, 0, 0, 1,   1, 2, 2, 2,   2, 2, 2, 2,   2, 0, 1, 1,   
-0, 0, 0, 2,   0, 2, 2, 2,   2, 2, 2, 2,   2, 2, 0, 1,   
-0, 0, 0, 1,   1, 2, 2, 2,   2, 2, 2, 2,   2, 2, 2, 0,
-};
-
-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
-
-static int shapeindex_to_compressed_indices[NSHAPES*3] = 
-{
-	0, 3,15,  0, 3, 8,  0,15, 8,  0,15, 3,
-	0, 8,15,  0, 3,15,  0,15, 3,  0,15, 8,
-	0, 8,15,  0, 8,15,  0, 6,15,  0, 6,15,
-	0, 6,15,  0, 5,15,  0, 3,15,  0, 3, 8,
-
-	0, 3,15,  0, 3, 8,  0, 8,15,  0,15, 3,
-	0, 3,15,  0, 3, 8,  0, 6,15,  0,10, 8,
-	0, 5, 3,  0, 8,15,  0, 8, 6,  0, 6,10,
-	0, 8,15,  0, 5,15,  0,15,10,  0,15, 8,
-
-	0, 8,15,  0,15, 3,  0, 3,15,  0, 5,10,
-	0, 6,10,  0,10, 8,  0, 8, 9,  0,15,10,
-	0,15, 6,  0, 3,15,  0,15, 8,  0, 5,15,
-	0,15, 3,  0,15, 6,  0,15, 6,  0,15, 8,
-
-	0, 3,15,  0,15, 3,  0, 5,15,  0, 5,15,
-	0, 5,15,  0, 8,15,  0, 5,15,  0,10,15,
-	0, 5,15,  0,10,15,  0, 8,15,  0,13,15,
-	0,15, 3,  0,12,15,  0, 3,15,  0, 3, 8
-
-};
-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*3+(region)]
-
-#endif

+ 0 - 133
3rdparty/nvtt/bc7/shapes_two.h

@@ -1,133 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_SHAPES_TWO_H
-#define _AVPCL_SHAPES_TWO_H
-
-// shapes for two regions
-
-#define NREGIONS 2
-#define NSHAPES 64
-#define SHAPEBITS 6
-
-static int shapes[NSHAPES*16] = 
-{
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   0, 0, 0, 1,   
-0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
-0, 0, 0, 1,   1, 1, 1, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-0, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 0,   
-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 0,   0, 1, 1, 1,   
-1, 0, 0, 0,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 1, 1,   
-1, 1, 1, 0,   0, 0, 0, 0,   1, 0, 0, 0,   0, 0, 0, 1,   
-1, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 0, 0, 0,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 1, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 0, 0,   1, 0, 0, 0,   0, 0, 1, 1,   
-0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   0, 0, 0, 1,   
-
-0, 0, 1, 1,   0, 0, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-0, 0, 0, 0,   1, 1, 0, 0,   0, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 0, 1,   0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 1, 1,   
-0, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 1,   1, 0, 0, 1,   
-1, 1, 1, 0,   1, 1, 1, 1,   1, 0, 0, 0,   1, 0, 0, 1,   
-1, 0, 0, 0,   0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   
-
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   0, 0, 1, 1,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   0, 0, 1, 1,   
-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   1, 1, 0, 0,   
-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   1, 1, 0, 0,   
-
-0, 0, 1, 1,   0, 1, 0, 1,   0, 1, 1, 0,   0, 1, 0, 1,   
-1, 1, 0, 0,   0, 1, 0, 1,   1, 0, 0, 1,   1, 0, 1, 0,   
-0, 0, 1, 1,   1, 0, 1, 0,   0, 1, 1, 0,   1, 0, 1, 0,   
-1, 1, 0, 0,   1, 0, 1, 0,   1, 0, 0, 1,   0, 1, 0, 1,   
-
-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   0, 0, 1, 1,   
-0, 0, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   1, 0, 1, 1,   
-1, 1, 0, 0,   1, 1, 0, 0,   0, 1, 0, 0,   1, 1, 0, 1,   
-1, 1, 1, 0,   1, 0, 0, 0,   1, 1, 0, 0,   1, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 0, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   0, 1, 1, 0,   
-0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   0, 0, 0, 0,   
-
-0, 1, 0, 0,   0, 0, 1, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
-1, 1, 1, 0,   0, 1, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 1, 0, 0,   0, 0, 1, 0,   0, 1, 1, 1,   1, 1, 1, 0,   
-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 0,   0, 1, 0, 0,   
-
-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 1, 1,   
-1, 1, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   
-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   1, 1, 0, 0,   
-0, 0, 1, 1,   1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   
-
-0, 1, 1, 0,   0, 1, 1, 0,   0, 1, 1, 1,   0, 0, 0, 1,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 1, 1, 0,   1, 0, 0, 0,   
-1, 1, 0, 0,   0, 0, 1, 1,   1, 0, 0, 0,   1, 1, 1, 0,   
-1, 0, 0, 1,   1, 0, 0, 1,   0, 0, 0, 1,   0, 1, 1, 1,   
-
-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-1, 1, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 0,   0, 1, 1, 1,   
-0, 0, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 1, 1, 1,   
-
-};
-
-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
-
-static int shapeindex_to_compressed_indices[NSHAPES*2] = 
-{
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0,15,  0,15,  0,15,
-
-	0,15,  0, 2,  0, 8,  0, 2,
-	0, 2,  0, 8,  0, 8,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 8,  0, 8,  0, 2,  0, 2,
-
-	0,15,  0,15,  0, 6,  0, 8,
-	0, 2,  0, 8,  0,15,  0,15,
-	0, 2,  0, 8,  0, 2,  0, 2,
-	0, 2,  0,15,  0,15,  0, 6,
-
-	0, 6,  0, 2,  0, 6,  0, 8,
-	0,15,  0,15,  0, 2,  0, 2,
-	0,15,  0,15,  0,15,  0,15,
-	0,15,  0, 2,  0, 2,  0,15
-
-};
-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*2+(region)]
-
-#endif

+ 0 - 41
3rdparty/nvtt/bc7/tile.h

@@ -1,41 +0,0 @@
-/*
-Copyright 2007 nVidia, Inc.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
-
-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-
-See the License for the specific language governing permissions and limitations under the License.
-*/
-
-#ifndef _AVPCL_TILE_H
-#define _AVPCL_TILE_H
-
-#include "nvmath/vector.h"
-#include <math.h>
-#include "avpcl_utils.h"
-
-namespace AVPCL {
-
-// extract a tile of pixels from an array
-
-class Tile
-{
-public:
-	static const int TILE_H = 4;
-	static const int TILE_W = 4;
-	static const int TILE_TOTAL = TILE_H * TILE_W;
-	nv::Vector4 data[TILE_H][TILE_W];
-    float importance_map[TILE_H][TILE_W];
-	int	size_x, size_y;			// actual size of tile
-
-	Tile() {};
-	~Tile(){};
-	Tile(int xs, int ys) {size_x = xs; size_y = ys;}
-};
-
-}
-
-#endif

+ 0 - 181
3rdparty/nvtt/nvcore/array.h

@@ -1,181 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_ARRAY_H
-#define NV_CORE_ARRAY_H
-
-/*
-This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be 
-using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
-are not supported.
-
-Note also that push_back and resize does not support inserting arguments elements that are in the same 
-container. This is forbidden to prevent an extra copy.
-*/
-
-
-#include "memory.h"
-#include "debug.h"
-#include "foreach.h" // pseudoindex
-
-
-namespace nv 
-{
-    class Stream;
-
-    /**
-    * Replacement for std::vector that is easier to debug and provides
-    * some nice foreach enumerators. 
-    */
-    template<typename T>
-    class NVCORE_CLASS Array {
-    public:
-        typedef uint size_type;
-
-        // Default constructor.
-        NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
-
-        // Copy constructor.
-        NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
-            copy(a.m_buffer, a.m_size);
-        }
-
-        // Constructor that initializes the vector with the given elements.
-        NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
-            copy(ptr, num);
-        }
-
-        // Allocate array.
-        NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
-            setArrayCapacity(capacity);
-        }
-
-        // Destructor.
-        NV_FORCEINLINE ~Array() {
-            clear();
-            free<T>(m_buffer);
-        }
-
-
-        /// Const element access.
-        NV_FORCEINLINE const T & operator[]( uint index ) const
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-        NV_FORCEINLINE const T & at( uint index ) const
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-
-        /// Element access.
-        NV_FORCEINLINE T & operator[] ( uint index )
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-        NV_FORCEINLINE T & at( uint index )
-        {
-            nvDebugCheck(index < m_size);
-            return m_buffer[index];
-        }
-
-        /// Get vector size.
-        NV_FORCEINLINE uint size() const { return m_size; }
-
-        /// Get vector size.
-        NV_FORCEINLINE uint count() const { return m_size; }
-
-        /// Get vector capacity.
-        NV_FORCEINLINE uint capacity() const { return m_capacity; }
-
-        /// Get const vector pointer.
-        NV_FORCEINLINE const T * buffer() const { return m_buffer; }
-
-        /// Get vector pointer.
-        NV_FORCEINLINE T * buffer() { return m_buffer; }
-
-        /// Provide begin/end pointers for C++11 range-based for loops.
-        NV_FORCEINLINE T * begin() { return m_buffer; }
-        NV_FORCEINLINE T * end() { return m_buffer + m_size; }
-        NV_FORCEINLINE const T * begin() const { return m_buffer; }
-        NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
-
-        /// Is vector empty.
-        NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
-
-        /// Is a null vector.
-        NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
-
-
-        T & append();
-        void push_back( const T & val );
-        void pushBack( const T & val );
-        Array<T> & append( const T & val );
-        Array<T> & operator<< ( T & t );
-        void pop_back();
-        void popBack(uint count = 1);
-        void popFront(uint count = 1);
-        const T & back() const;
-        T & back();
-        const T & front() const;
-        T & front();
-        bool contains(const T & e) const;
-        bool find(const T & element, uint * indexPtr) const;
-        bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
-        void removeAt(uint index);
-        bool remove(const T & element);
-        void insertAt(uint index, const T & val = T());
-        void append(const Array<T> & other);
-        void append(const T other[], uint count);
-        void replaceWithLast(uint index);
-        void resize(uint new_size);
-        void resize(uint new_size, const T & elem);
-        void fill(const T & elem);
-        void clear();
-        void shrink();
-        void reserve(uint desired_size);
-        void copy(const T * data, uint count);
-        Array<T> & operator=( const Array<T> & a );
-        T * release();
-
-
-        // Array enumerator.
-        typedef uint PseudoIndex;
-
-        NV_FORCEINLINE PseudoIndex start() const { return 0; }
-        NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
-        NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
-
-#if NV_CC_MSVC
-        NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
-            return m_buffer[i(this)];
-        }
-        NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
-            return m_buffer[i(this)];
-        }
-#endif
-
-        // Friends.
-        template <typename Typ> 
-        friend Stream & operator<< ( Stream & s, Array<Typ> & p );
-
-        template <typename Typ>
-        friend void swap(Array<Typ> & a, Array<Typ> & b);
-
-
-    protected:
-
-        void setArraySize(uint new_size);
-        void setArrayCapacity(uint new_capacity);
-
-        T * m_buffer;
-        uint m_capacity;
-        uint m_size;
-
-    };
-
-
-} // nv namespace
-
-#endif // NV_CORE_ARRAY_H

+ 0 - 437
3rdparty/nvtt/nvcore/array.inl

@@ -1,437 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_ARRAY_INL
-#define NV_CORE_ARRAY_INL
-
-#include "array.h"
-
-#include "stream.h"
-#include "utils.h" // swap
-
-#include <string.h>	// memmove
-#include <new> // for placement new
-
-
-
-namespace nv 
-{
-    template <typename T>
-    NV_FORCEINLINE T & Array<T>::append()
-    {
-        uint old_size = m_size;
-        uint new_size = m_size + 1;
-
-        setArraySize(new_size);
-
-        construct_range(m_buffer, new_size, old_size);
-
-        return m_buffer[old_size]; // Return reference to last element.
-    }
-
-    // Push an element at the end of the vector.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::push_back( const T & val )
-    {
-#if 1
-        nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);
-
-        uint old_size = m_size;
-        uint new_size = m_size + 1;
-
-        setArraySize(new_size);
-
-        construct_range(m_buffer, new_size, old_size, val);
-#else
-        uint new_size = m_size + 1;
-
-        if (new_size > m_capacity)
-        {
-            // @@ Is there any way to avoid this copy?
-            // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?
-            // @@ Assert instead of copy?
-            const T copy(val);	// create a copy in case value is inside of this array.
-
-            setArraySize(new_size);
-
-            new (m_buffer+new_size-1) T(copy);
-        }
-        else
-        {
-            m_size = new_size;
-            new(m_buffer+new_size-1) T(val);
-        }
-#endif // 0/1
-    }
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::pushBack( const T & val )
-    {
-        push_back(val);
-    }
-    template <typename T>
-    NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )
-    {
-        push_back(val);
-        return *this;
-    }
-
-    // Qt like push operator.
-    template <typename T>
-    NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )
-    {
-        push_back(t);
-        return *this;
-    }
-
-    // Pop the element at the end of the vector.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::pop_back()
-    {
-        nvDebugCheck( m_size > 0 );
-        resize( m_size - 1 );
-    }
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::popBack(uint count)
-    {
-        nvDebugCheck(m_size >= count);
-        resize(m_size - count);
-    }
-
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::popFront(uint count)
-    {
-        nvDebugCheck(m_size >= count);
-        //resize(m_size - count);
-
-        if (m_size == count) {
-            clear();
-        }
-        else {
-            destroy_range(m_buffer, 0, count);
-
-            memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
-
-            m_size -= count;
-        }
-
-    }
-
-
-    // Get back element.
-    template <typename T>
-    NV_FORCEINLINE const T & Array<T>::back() const
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[m_size-1];
-    }
-
-    // Get back element.
-    template <typename T>
-    NV_FORCEINLINE T & Array<T>::back()
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[m_size-1];
-    }
-
-    // Get front element.
-    template <typename T>
-    NV_FORCEINLINE const T & Array<T>::front() const
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[0];
-    }
-
-    // Get front element.
-    template <typename T>
-    NV_FORCEINLINE T & Array<T>::front()
-    {
-        nvDebugCheck( m_size > 0 );
-        return m_buffer[0];
-    }
-
-    // Check if the given element is contained in the array.
-    template <typename T>
-    NV_FORCEINLINE bool Array<T>::contains(const T & e) const
-    {
-        return find(e, NULL);
-    }
-
-    // Return true if element found.
-    template <typename T>
-    NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const
-    {
-        return find(element, 0, m_size, indexPtr);
-    }
-
-    // Return true if element found within the given range.
-    template <typename T>
-    NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const
-    {
-        return ::nv::find(element, m_buffer, begin, end, indexPtr);
-    }
-
-
-    // Remove the element at the given index. This is an expensive operation!
-    template <typename T>
-    void Array<T>::removeAt(uint index)
-    {
-        nvDebugCheck(index >= 0 && index < m_size);
-
-        if (m_size == 1) {
-            clear();
-        }
-        else {
-            m_buffer[index].~T();
-
-            memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
-            m_size--;
-        }
-    }
-
-    // Remove the first instance of the given element.
-    template <typename T>
-    bool Array<T>::remove(const T & element)
-    {
-        uint index;
-        if (find(element, &index)) {
-            removeAt(index);
-            return true;
-        }
-        return false;
-    }
-
-    // Insert the given element at the given index shifting all the elements up.
-    template <typename T>
-    void Array<T>::insertAt(uint index, const T & val/*=T()*/)
-    {
-        nvDebugCheck( index >= 0 && index <= m_size );
-
-        setArraySize(m_size + 1);
-
-        if (index < m_size - 1) {
-            memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
-        }
-
-        // Copy-construct into the newly opened slot.
-        new(m_buffer+index) T(val);
-    }
-
-    // Append the given data to our vector.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::append(const Array<T> & other)
-    {
-        append(other.m_buffer, other.m_size);
-    }
-
-    // Append the given data to our vector.
-    template <typename T>
-    void Array<T>::append(const T other[], uint count)
-    {
-        if (count > 0) {
-            const uint old_size = m_size;
-
-            setArraySize(m_size + count);
-
-            for (uint i = 0; i < count; i++ ) {
-                new(m_buffer + old_size + i) T(other[i]);
-            }
-        }
-    }
-
-
-    // Remove the given element by replacing it with the last one.
-    template <typename T> 
-    void Array<T>::replaceWithLast(uint index)
-    {
-        nvDebugCheck( index < m_size );
-        nv::swap(m_buffer[index], back());      // @@ Is this OK when index == size-1?
-        (m_buffer+m_size-1)->~T();
-        m_size--;
-    }
-
-    // Resize the vector preserving existing elements.
-    template <typename T> 
-    void Array<T>::resize(uint new_size)
-    {
-        uint old_size = m_size;
-
-        // Destruct old elements (if we're shrinking).
-        destroy_range(m_buffer, new_size, old_size);
-
-        setArraySize(new_size);
-
-        // Call default constructors
-        construct_range(m_buffer, new_size, old_size);
-    }
-
-
-    // Resize the vector preserving existing elements and initializing the
-    // new ones with the given value.
-    template <typename T> 
-    void Array<T>::resize(uint new_size, const T & elem)
-    {
-        nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);
-
-        uint old_size = m_size;
-
-        // Destruct old elements (if we're shrinking).
-        destroy_range(m_buffer, new_size, old_size);
-
-        setArraySize(new_size);
-
-        // Call copy constructors
-        construct_range(m_buffer, new_size, old_size, elem);
-    }
-
-    // Fill array with the given value.
-    template <typename T>
-    void Array<T>::fill(const T & elem)
-    {
-        fill(m_buffer, m_size, elem);
-    }
-
-    // Clear the buffer.
-    template <typename T> 
-    NV_FORCEINLINE void Array<T>::clear()
-    {
-        nvDebugCheck(isValidPtr(m_buffer));
-
-        // Destruct old elements
-        destroy_range(m_buffer, 0, m_size);
-
-        m_size = 0;
-    }
-
-    // Shrink the allocated vector.
-    template <typename T> 
-    NV_FORCEINLINE void Array<T>::shrink()
-    {
-        if (m_size < m_capacity) {
-            setArrayCapacity(m_size);
-        }
-    }
-
-    // Preallocate space.
-    template <typename T> 
-    NV_FORCEINLINE void Array<T>::reserve(uint desired_size)
-    {
-        if (desired_size > m_capacity) {
-            setArrayCapacity(desired_size);
-        }
-    }
-
-    // Copy elements to this array. Resizes it if needed.
-    template <typename T>
-    NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
-    {
-#if 1   // More simple, but maybe not be as efficient?
-        destroy_range(m_buffer, 0, m_size);
-
-        setArraySize(count);
-
-        construct_range(m_buffer, count, 0, data);
-#else
-        const uint old_size = m_size;
-
-        destroy_range(m_buffer, count, old_size);
-
-        setArraySize(count);
-
-        copy_range(m_buffer, data, old_size);
-
-        construct_range(m_buffer, count, old_size, data);
-#endif
-    }
-
-    // Assignment operator.
-    template <typename T>
-    NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )
-    {
-        copy(a.m_buffer, a.m_size);
-        return *this;
-    }
-
-    // Release ownership of allocated memory and returns pointer to it.
-    template <typename T>
-    T * Array<T>::release() {
-        T * tmp = m_buffer;
-        m_buffer = NULL;
-        m_capacity = 0;
-        m_size = 0;
-        return tmp;
-    }
-
-
-
-    // Change array size.
-    template <typename T> 
-    inline void Array<T>::setArraySize(uint new_size) {
-        m_size = new_size;
-
-        if (new_size > m_capacity) {
-            uint new_buffer_size;
-            if (m_capacity == 0) {
-                // first allocation is exact
-                new_buffer_size = new_size;
-            }
-            else {
-                // following allocations grow array by 25%
-                new_buffer_size = new_size + (new_size >> 2);
-            }
-
-            setArrayCapacity( new_buffer_size );
-        }
-    }
-
-    // Change array capacity.
-    template <typename T> 
-    inline void Array<T>::setArrayCapacity(uint new_capacity) {
-        nvDebugCheck(new_capacity >= m_size);
-
-        if (new_capacity == 0) {
-            // free the buffer.
-            if (m_buffer != NULL) {
-                free<T>(m_buffer);
-                m_buffer = NULL;
-            }
-        }
-        else {
-            // realloc the buffer
-            m_buffer = realloc<T>(m_buffer, new_capacity);
-        }
-
-        m_capacity = new_capacity;
-    }
-
-    // Array serialization.
-    template <typename Typ> 
-    inline Stream & operator<< ( Stream & s, Array<Typ> & p )
-    {
-        if (s.isLoading()) {
-            uint size;
-            s << size;
-            p.resize( size );
-        }
-        else {
-            s << p.m_size;
-        }
-
-        for (uint i = 0; i < p.m_size; i++) {
-            s << p.m_buffer[i];
-        }
-
-        return s;
-    }
-
-    // Swap the members of the two given vectors.
-    template <typename Typ>
-    inline void swap(Array<Typ> & a, Array<Typ> & b)
-    {
-        nv::swap(a.m_buffer, b.m_buffer);
-        nv::swap(a.m_capacity, b.m_capacity);
-        nv::swap(a.m_size, b.m_size);
-    }
-
-
-} // nv namespace
-
-#endif // NV_CORE_ARRAY_INL

+ 0 - 216
3rdparty/nvtt/nvcore/debug.h

@@ -1,216 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_DEBUG_H
-#define NV_CORE_DEBUG_H
-
-#include "nvcore.h"
-
-#include <stdarg.h> // va_list
-
-
-// Make sure we are using our assert.
-#undef assert
-
-#define NV_ABORT_DEBUG      1
-#define NV_ABORT_IGNORE     2
-#define NV_ABORT_EXIT       3
-
-#define nvNoAssert(exp) \
-    NV_MULTI_LINE_MACRO_BEGIN \
-    (void)sizeof(exp); \
-    NV_MULTI_LINE_MACRO_END
-
-#if NV_NO_ASSERT
-
-#   define nvAssert(exp) nvNoAssert(exp)
-#   define nvCheck(exp) nvNoAssert(exp)
-#   define nvDebugAssert(exp) nvNoAssert(exp)
-#   define nvDebugCheck(exp) nvNoAssert(exp)
-#   define nvDebugBreak() nvNoAssert(0)
-
-#else // NV_NO_ASSERT
-
-#   if NV_CC_MSVC
-        // @@ Does this work in msvc-6 and earlier?
-#       define nvDebugBreak()       __debugbreak()
-//#       define nvDebugBreak()        __asm { int 3 }
-#   elif NV_OS_ORBIS
-#       define nvDebugBreak()       __debugbreak()
-#   elif NV_CC_GNUC
-#       define nvDebugBreak()       __builtin_trap()
-#   else
-#       error "No nvDebugBreak()!"
-#   endif
-
-/*
-#   elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
-        // @@ Use __builtin_trap() on GCC
-#       define nvDebugBreak()       __asm__ volatile ("trap")
-#   elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
-#       define nvDebugBreak()       __asm__ volatile ("int3")
-#   elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
-#       define nvDebugBreak()       __asm__ ( "int %0" : :"I"(3) )
-#   else
-#       include <signal.h>
-#       define nvDebugBreak()       raise(SIGTRAP)
-#   endif
-*/
-
-#define nvDebugBreakOnce() \
-    NV_MULTI_LINE_MACRO_BEGIN \
-    static bool firstTime = true; \
-    if (firstTime) { firstTime = false; nvDebugBreak(); } \
-    NV_MULTI_LINE_MACRO_END
-
-#define nvAssertMacro(exp) \
-    NV_MULTI_LINE_MACRO_BEGIN \
-    if (!(exp)) { \
-        if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
-            nvDebugBreak(); \
-        } \
-    } \
-    NV_MULTI_LINE_MACRO_END
-
-// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
-#define nvAssertMacroWithIgnoreAll(exp,...) \
-    NV_MULTI_LINE_MACRO_BEGIN \
-        static bool ignoreAll = false; \
-        if (!ignoreAll && !(exp)) { \
-            int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
-            if (result == NV_ABORT_DEBUG) { \
-                nvDebugBreak(); \
-            } else if (result == NV_ABORT_IGNORE) { \
-                ignoreAll = true; \
-            } \
-        } \
-    NV_MULTI_LINE_MACRO_END
-
-// Interesting assert macro from Insomniac:
-// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
-// Used as follows:
-// if (nvCheck(i < count)) {
-//     normal path
-// } else {
-//     fixup code.
-// }
-// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
-#define nvCheckMacro(exp) \
-    (\
-        (exp) ? true : ( \
-            (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
-        ) \
-    )
-
-
-#define nvAssert(exp)    nvAssertMacro(exp)
-#define nvCheck(exp)     nvAssertMacro(exp)
-
-#if defined(_DEBUG)
-#   define nvDebugAssert(exp)   nvAssertMacro(exp)
-#   define nvDebugCheck(exp)    nvAssertMacro(exp)
-#else // _DEBUG
-#   define nvDebugAssert(exp)   nvNoAssert(exp)
-#   define nvDebugCheck(exp)    nvNoAssert(exp)
-#endif // _DEBUG
-
-#endif // NV_NO_ASSERT
-
-// Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
-/*#if !defined(_DEBUG)
-#   if NV_CC_MSVC
-#       define nvAssume(exp)    __assume(exp)
-#   else
-#       define nvAssume(exp)    nvCheck(exp)
-#   endif
-#else
-#   define nvAssume(exp)    nvCheck(exp)
-#endif*/
-
-#if defined(_DEBUG)
-#  if NV_CC_MSVC
-#   define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
-#  else
-#   define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
-#  endif
-#else
-#  if NV_CC_MSVC
-#   define nvUnreachable() __assume(0)
-#  else
-#   define nvUnreachable() __builtin_unreachable()
-#  endif
-#endif
-
-
-#define nvError(x)      nvAbort(x, __FILE__, __LINE__, __FUNC__)
-#define nvWarning(x)    nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
-
-#ifndef NV_DEBUG_PRINT
-#define NV_DEBUG_PRINT 1 //defined(_DEBUG)
-#endif
-
-#if NV_DEBUG_PRINT
-#define nvDebug(...)    nvDebugPrint(__VA_ARGS__)
-#else
-#if NV_CC_MSVC
-#define nvDebug(...)    __noop(__VA_ARGS__)
-#else
-#define nvDebug(...)    ((void)0) // Non-msvc platforms do not evaluate arguments?
-#endif
-#endif
-
-
-NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
-NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
-
-namespace nv
-{
-    inline bool isValidPtr(const void * ptr) {
-    #if NV_CPU_X86_64
-        if (ptr == NULL) return true;
-        if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
-        if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
-    #else
-	    if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
-	    if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
-	    if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
-	    if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
-    #endif
-        return true;
-    }
-
-    // Message handler interface.
-    struct MessageHandler {
-        virtual void log(const char * str, va_list arg) = 0;
-        virtual ~MessageHandler() {}
-    };
-
-    // Assert handler interface.
-    struct AssertHandler {
-        virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
-        virtual ~AssertHandler() {}
-    };
-
-
-    namespace debug
-    {
-        NVCORE_API void dumpInfo();
-        NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
-
-        NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
-        NVCORE_API void resetMessageHandler();
-
-        NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
-        NVCORE_API void resetAssertHandler();
-
-        NVCORE_API void enableSigHandler(bool interactive);
-        NVCORE_API void disableSigHandler();
-
-        NVCORE_API bool isDebuggerPresent();
-        NVCORE_API bool attachToDebugger();
-
-        NVCORE_API void terminate(int code);
-    }
-
-} // nv namespace
-
-#endif // NV_CORE_DEBUG_H

+ 0 - 57
3rdparty/nvtt/nvcore/defsgnucdarwin.h

@@ -1,57 +0,0 @@
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
-#include <stddef.h> // operator new, size_t, NULL
-
-#ifndef __STDC_VERSION__
-#	define __STDC_VERSION__ 0
-#endif // __STDC_VERSION__
-
-// Function linkage
-#define DLL_IMPORT
-#if __GNUC__ >= 4
-#	define DLL_EXPORT __attribute__((visibility("default")))
-#	define DLL_EXPORT_CLASS DLL_EXPORT
-#else
-#	define DLL_EXPORT
-#	define DLL_EXPORT_CLASS
-#endif
-
-// Function calling modes
-#if NV_CPU_X86
-#	define NV_CDECL 	__attribute__((cdecl))
-#	define NV_STDCALL	__attribute__((stdcall))
-#else
-#	define NV_CDECL 
-#	define NV_STDCALL
-#endif
-
-#define NV_FASTCALL		__attribute__((fastcall))
-#define NV_FORCEINLINE	inline
-#define NV_DEPRECATED   __attribute__((deprecated))
-#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
-
-#if __GNUC__ > 2
-#define NV_PURE     __attribute__((pure))
-#define NV_CONST    __attribute__((const))
-#else
-#define NV_PURE
-#define NV_CONST
-#endif
-
-#define NV_NOINLINE __attribute__((noinline))
-
-// Define __FUNC__ properly.
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
-#	if __GNUC__ >= 2
-#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
-#	else
-#		define __FUNC__ "<unknown>"
-#	endif
-#else
-#	define __FUNC__ __PRETTY_FUNCTION__
-#endif
-
-#define restrict    __restrict__

+ 0 - 63
3rdparty/nvtt/nvcore/defsgnuclinux.h

@@ -1,63 +0,0 @@
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
-#include <stddef.h> // operator new, size_t, NULL
-
-#ifndef __STDC_VERSION__
-#	define __STDC_VERSION__ 0
-#endif
-
-// Function linkage
-#define DLL_IMPORT
-#if __GNUC__ >= 4
-#   define DLL_EXPORT   __attribute__((visibility("default")))
-#   define DLL_EXPORT_CLASS DLL_EXPORT
-#else
-#   define DLL_EXPORT
-#   define DLL_EXPORT_CLASS
-#endif
-
-// Function calling modes
-#if NV_CPU_X86
-#   define NV_CDECL     __attribute__((cdecl))
-#   define NV_STDCALL   __attribute__((stdcall))
-#else
-#   define NV_CDECL 
-#   define NV_STDCALL
-#endif
-
-#define NV_FASTCALL     __attribute__((fastcall))
-//#if __GNUC__ > 3
-// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
-#define NV_FORCEINLINE  inline
-//#else
-// Some compilers complain that inline and always_inline are redundant.
-//#define NV_FORCEINLINE  __attribute__((always_inline))
-//#endif
-#define NV_DEPRECATED   __attribute__((deprecated))
-#define NV_THREAD_LOCAL __thread 
-
-#if __GNUC__ > 2
-#define NV_PURE     __attribute__((pure))
-#define NV_CONST    __attribute__((const))
-#else
-#define NV_PURE
-#define NV_CONST
-#endif
-
-#define NV_NOINLINE __attribute__((noinline))
-
-// Define __FUNC__ properly.
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
-#   if __GNUC__ >= 2
-#       define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
-#   else
-#       define __FUNC__ "<unknown>"
-#   endif
-#else
-#   define __FUNC__ __PRETTY_FUNCTION__
-#endif
-
-#define restrict    __restrict__

+ 0 - 65
3rdparty/nvtt/nvcore/defsgnucwin32.h

@@ -1,65 +0,0 @@
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-//#include <cstddef> // size_t, NULL
-
-// Function linkage
-#define DLL_IMPORT	__declspec(dllimport)
-#define DLL_EXPORT	__declspec(dllexport)
-#define DLL_EXPORT_CLASS DLL_EXPORT
-
-// Function calling modes
-#if NV_CPU_X86
-#	define NV_CDECL 	__attribute__((cdecl))
-#	define NV_STDCALL	__attribute__((stdcall))
-#else
-#	define NV_CDECL 
-#	define NV_STDCALL
-#endif
-
-#define NV_FASTCALL		__attribute__((fastcall))
-#define NV_FORCEINLINE	inline
-#define NV_DEPRECATED   __attribute__((deprecated))
-
-#if __GNUC__ > 2
-#define NV_PURE		__attribute__((pure))
-#define NV_CONST	__attribute__((const))
-#else
-#define NV_PURE
-#define NV_CONST
-#endif
-
-#define NV_NOINLINE __attribute__((noinline))
-
-// Define __FUNC__ properly.
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
-#	if __GNUC__ >= 2
-#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
-#	else
-#		define __FUNC__ "<unknown>"
-#	endif
-#else
-#	define __FUNC__ __PRETTY_FUNCTION__
-#endif
-
-#define restrict	__restrict__
-
-/*
-// Type definitions
-typedef unsigned char		uint8;
-typedef signed char			int8;
-
-typedef unsigned short		uint16;
-typedef signed short		int16;
-
-typedef unsigned int		uint32;
-typedef signed int			int32;
-
-typedef unsigned long long	uint64;
-typedef signed long long	int64;
-
-// Aliases
-typedef uint32				uint;
-*/
-

+ 0 - 94
3rdparty/nvtt/nvcore/defsvcwin32.h

@@ -1,94 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_H
-#error "Do not include this file directly."
-#endif
-
-// Function linkage
-#define DLL_IMPORT __declspec(dllimport)
-#define DLL_EXPORT __declspec(dllexport)
-#define DLL_EXPORT_CLASS DLL_EXPORT
-
-// Function calling modes
-#define NV_CDECL        __cdecl
-#define NV_STDCALL      __stdcall
-#define NV_FASTCALL     __fastcall
-#define NV_DEPRECATED
-
-#define NV_PURE
-#define NV_CONST
-
-// Set standard function names.
-#if _MSC_VER < 1900
-#   define snprintf _snprintf
-#endif
-#if _MSC_VER < 1500
-#   define vsnprintf _vsnprintf
-#endif
-#if _MSC_VER < 1700
-#   define strtoll _strtoi64
-#   define strtoull _strtoui64
-#endif
-#define chdir _chdir
-#define getcwd _getcwd 
-
-#if _MSC_VER < 1800 // Not sure what version introduced this.
-#define va_copy(a, b) (a) = (b)
-#endif
-
-#if !defined restrict
-#define restrict
-#endif
-
-// Ignore gcc attributes.
-#define __attribute__(X)
-
-#if !defined __FUNC__
-#define __FUNC__ __FUNCTION__ 
-#endif
-
-#define NV_NOINLINE __declspec(noinline)
-#define NV_FORCEINLINE inline
-
-#define NV_THREAD_LOCAL __declspec(thread)
-
-/*
-// Type definitions
-typedef unsigned char       uint8;
-typedef signed char         int8;
-
-typedef unsigned short      uint16;
-typedef signed short        int16;
-
-typedef unsigned int        uint32;
-typedef signed int          int32;
-
-typedef unsigned __int64    uint64;
-typedef signed __int64      int64;
-
-// Aliases
-typedef uint32              uint;
-*/
-
-// Unwanted VC++ warnings to disable.
-/*
-#pragma warning(disable : 4244)     // conversion to float, possible loss of data
-#pragma warning(disable : 4245)     // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
-#pragma warning(disable : 4100)     // unreferenced formal parameter
-#pragma warning(disable : 4514)     // unreferenced inline function has been removed
-#pragma warning(disable : 4710)     // inline function not expanded
-#pragma warning(disable : 4127)     // Conditional expression is constant
-#pragma warning(disable : 4305)     // truncation from 'const double' to 'float'
-#pragma warning(disable : 4505)     // unreferenced local function has been removed
-
-#pragma warning(disable : 4702)     // unreachable code in inline expanded function
-#pragma warning(disable : 4711)     // function selected for automatic inlining
-#pragma warning(disable : 4725)     // Pentium fdiv bug
-
-#pragma warning(disable : 4786)     // Identifier was truncated and cannot be debugged.
-
-#pragma warning(disable : 4675)     // resolved overload was found by argument-dependent lookup
-*/
-
-#pragma warning(1 : 4705)     // Report unused local variables.
-#pragma warning(1 : 4555)     // Expression has no effect.

+ 0 - 68
3rdparty/nvtt/nvcore/foreach.h

@@ -1,68 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#pragma once
-#ifndef NV_CORE_FOREACH_H
-#define NV_CORE_FOREACH_H
-
-/*
-These foreach macros are very non-standard and somewhat confusing, but I like them.
-*/
-
-#include "nvcore.h"
-
-#if NV_CC_GNUC // If typeof or decltype is available:
-#if !NV_CC_CPP11
-#   define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype
-#else
-#   define NV_DECLTYPE decltype
-#endif
-
-/*
-Ideally we would like to write this:
-
-#define NV_FOREACH(i, container) \
-    for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
-
-But gcc versions prior to 4.7 required an intermediate type. See:
-https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
-*/
-
-#define NV_FOREACH(i, container) \
-    typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \
-    for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
-
-#else // If typeof not available:
-
-#include <new> // placement new
-
-struct PseudoIndexWrapper {
-    template <typename T>
-    PseudoIndexWrapper(const T & container) {
-        nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
-        new (memory) typename T::PseudoIndex(container.start());
-    }
-    // PseudoIndex cannot have a dtor!
-
-    template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
-        return *reinterpret_cast<typename T::PseudoIndex *>(memory);
-    }
-    template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
-        return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
-    }
-
-    uint8 memory[4];	// Increase the size if we have bigger enumerators.
-};
-
-#define NV_FOREACH(i, container) \
-    for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
-
-#endif
-
-// Declare foreach keyword.
-#if !defined NV_NO_USE_KEYWORDS
-#   define foreach NV_FOREACH
-#   define foreach_index NV_FOREACH
-#endif
-
-
-#endif // NV_CORE_FOREACH_H

+ 0 - 83
3rdparty/nvtt/nvcore/hash.h

@@ -1,83 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#pragma once
-#ifndef NV_CORE_HASH_H
-#define NV_CORE_HASH_H
-
-#include "nvcore.h"
-
-namespace nv
-{
-    inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
-    {
-        const uint8 * data = (const uint8 *) data_in;
-        uint i = 0;
-        while (i < size) {
-            h = (h << 16) + (h << 6) - h + (uint) data[i++];
-        }
-        return h;
-    }
-
-    // Note that this hash does not handle NaN properly.
-    inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
-    {
-        for (uint i = 0; i < count; i++) {
-            //nvDebugCheck(nv::isFinite(*f));
-            union { float f; uint32 i; } x = { f[i] };
-            if (x.i == 0x80000000) x.i = 0;
-            h = sdbmHash(&x, 4, h);
-        }
-        return h;
-    }
-
-
-    template <typename T>
-    inline uint hash(const T & t, uint h = 5381)
-    {
-        return sdbmHash(&t, sizeof(T), h);
-    }
-
-    template <>
-    inline uint hash(const float & f, uint h)
-    {
-        return sdbmFloatHash(&f, 1, h);
-    }
-
-
-    // Functors for hash table:
-    template <typename Key> struct Hash 
-    {
-        uint operator()(const Key & k) const {
-            return hash(k);
-        }
-    };
-
-    template <typename Key> struct Equal
-    {
-        bool operator()(const Key & k0, const Key & k1) const {
-            return k0 == k1;
-        }
-    };
-
-
-    // @@ Move to Utils.h?
-    template <typename T1, typename T2>
-    struct Pair {
-        T1 first;
-        T2 second;
-    };
-
-    template <typename T1, typename T2>
-    bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
-        return p0.first == p1.first && p0.second == p1.second;
-    }
-
-    template <typename T1, typename T2>
-    uint hash(const Pair<T1,T2> & p, uint h = 5381) {
-        return hash(p.second, hash(p.first));
-    }
-
-
-} // nv namespace
-
-#endif // NV_CORE_HASH_H

+ 0 - 30
3rdparty/nvtt/nvcore/memory.h

@@ -1,30 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_MEMORY_H
-#define NV_CORE_MEMORY_H
-
-#include "nvcore.h"
-#include <stdlib.h>
-
-namespace nv {
-
-    // C++ helpers.
-    template <typename T> inline T * malloc(size_t count) {
-        return (T *)::malloc(sizeof(T) * count);
-    }
-
-    template <typename T> inline T * realloc(T * ptr, size_t count) {
-        return (T *)::realloc(ptr, sizeof(T) * count);
-    }
-
-    template <typename T> inline void free(const T * ptr) {
-        ::free((void *)ptr);
-    }
-
-    template <typename T> inline void zero(T & data) {
-        memset(&data, 0, sizeof(T));
-    }
-
-} // nv namespace
-
-#endif // NV_CORE_MEMORY_H

+ 0 - 363
3rdparty/nvtt/nvcore/nvcore.h

@@ -1,363 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_H
-#define NV_CORE_H
-
-#define NVCORE_SHARED 0
-#define NV_NO_ASSERT 0
-
-// Function linkage
-#if NVCORE_SHARED
-#ifdef NVCORE_EXPORTS
-#define NVCORE_API DLL_EXPORT
-#define NVCORE_CLASS DLL_EXPORT_CLASS
-#else
-#define NVCORE_API DLL_IMPORT
-#define NVCORE_CLASS DLL_IMPORT
-#endif
-#else // NVCORE_SHARED
-#define NVCORE_API
-#define NVCORE_CLASS
-#endif // NVCORE_SHARED
-
-// Platform definitions
-#include "posh.h"
-
-#define NV_OS_STRING POSH_OS_STRING
-
-#if defined POSH_OS_LINUX
-#   define NV_OS_LINUX 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_ORBIS
-#   define NV_OS_ORBIS 1
-#elif defined POSH_OS_FREEBSD
-#   define NV_OS_FREEBSD 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_OPENBSD
-#   define NV_OS_OPENBSD 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_CYGWIN32
-#   define NV_OS_CYGWIN 1
-#elif defined POSH_OS_MINGW
-#   define NV_OS_MINGW 1
-#   define NV_OS_WIN32 1
-#elif defined POSH_OS_OSX
-#   define NV_OS_DARWIN 1
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_IOS
-#   define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
-#   define NV_OS_UNIX 1
-#   define NV_OS_IOS 1
-#elif defined POSH_OS_UNIX
-#   define NV_OS_UNIX 1
-#elif defined POSH_OS_WIN64
-#   define NV_OS_WIN32 1
-#   define NV_OS_WIN64 1
-#elif defined POSH_OS_WIN32
-#   define NV_OS_WIN32 1
-#elif defined POSH_OS_XBOX
-#   define NV_OS_XBOX 1
-#else
-#   error "Unsupported OS"
-#endif
-
-#ifndef NV_OS_WIN32
-#	define NV_OS_WIN32  0
-#endif // NV_OS_WIN32
-
-#ifndef NV_OS_WIN64
-#	define NV_OS_WIN64  0
-#endif // NV_OS_WIN64
-
-#ifndef NV_OS_MINGW
-#	define NV_OS_MINGW  0
-#endif // NV_OS_MINGW
-
-#ifndef NV_OS_CYGWIN
-#	define NV_OS_CYGWIN 0
-#endif // NV_OS_CYGWIN
-
-#ifndef NV_OS_LINUX
-#	define NV_OS_LINUX  0
-#endif // NV_OS_LINUX
-
-#ifndef NV_OS_FREEBSD
-#	define NV_OS_FREEBSD 0
-#endif // NV_OS_FREEBSD
-
-#ifndef NV_OS_OPENBSD
-#	define NV_OS_OPENBSD 0
-#endif // NV_OS_OPENBSD
-
-#ifndef NV_OS_UNIX
-#	define NV_OS_UNIX   0
-#endif // NV_OS_UNIX
-
-#ifndef NV_OS_DARWIN
-#	define NV_OS_DARWIN 0
-#endif // NV_OS_DARWIN
-
-#ifndef NV_OS_XBOX
-#	define NV_OS_XBOX   0
-#endif // NV_OS_XBOX
-
-#ifndef NV_OS_ORBIS
-#	define NV_OS_ORBIS  0
-#endif // NV_OS_ORBIS
-
-#ifndef NV_OS_IOS
-#	define NV_OS_IOS    0
-#endif // NV_OS_IOS
-
-// Threading:
-// some platforms don't implement __thread or similar for thread-local-storage
-#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
-#   define NV_OS_USE_PTHREAD 1
-#   if NV_OS_DARWIN || NV_OS_IOS
-#       define NV_OS_HAS_TLS_QUALIFIER 0
-#   else
-#       define NV_OS_HAS_TLS_QUALIFIER 1
-#   endif
-#else
-#   define NV_OS_USE_PTHREAD 0
-#   define NV_OS_HAS_TLS_QUALIFIER 1
-#endif
-
-
-// CPUs:
-
-#define NV_CPU_STRING   POSH_CPU_STRING
-
-#if defined POSH_CPU_X86_64
-//#   define NV_CPU_X86 1
-#   define NV_CPU_X86_64 1
-#elif defined POSH_CPU_X86
-#   define NV_CPU_X86 1
-#elif defined POSH_CPU_PPC
-#   define NV_CPU_PPC 1
-#elif defined POSH_CPU_STRONGARM
-#   define NV_CPU_ARM 1
-#elif defined POSH_CPU_AARCH64
-#   define NV_CPU_AARCH64 1
-#else
-#   error "Unsupported CPU"
-#endif
-
-#ifndef NV_CPU_X86
-#	define NV_CPU_X86     0
-#endif // NV_CPU_X86
-
-#ifndef NV_CPU_X86_64
-#	define NV_CPU_X86_64  0
-#endif // NV_CPU_X86_64
-
-#ifndef NV_CPU_PPC
-#	define NV_CPU_PPC     0
-#endif // NV_CPU_PPC
-
-#ifndef NV_CPU_ARM
-#	define NV_CPU_ARM     0
-#endif // NV_CPU_ARM
-
-#ifndef NV_CPU_AARCH64
-#	define NV_CPU_AARCH64 0
-#endif // NV_CPU_AARCH64
-
-// Compiler:
-
-#if defined POSH_COMPILER_CLANG
-#   define NV_CC_CLANG  1
-#   define NV_CC_GNUC   1    // Clang is compatible with GCC.
-#   define NV_CC_STRING "clang"
-#	pragma clang diagnostic ignored "-Wmissing-braces"
-#	pragma clang diagnostic ignored "-Wshadow"
-#	pragma clang diagnostic ignored "-Wunused-local-typedef"
-#	pragma clang diagnostic ignored "-Wunused-function"
-#	pragma clang diagnostic ignored "-Wunused-variable"
-#	pragma clang diagnostic ignored "-Wunused-parameter"
-#	pragma clang diagnostic ignored "-Wsometimes-uninitialized"
-#elif defined POSH_COMPILER_GCC
-#   define NV_CC_GNUC   1
-#   define NV_CC_STRING "gcc"
-#	pragma GCC diagnostic ignored "-Wshadow"
-#	pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
-#	pragma GCC diagnostic ignored "-Wunused-function"
-#	pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#	pragma GCC diagnostic ignored "-Wunused-variable"
-#	pragma GCC diagnostic ignored "-Wunused-parameter"
-#	pragma GCC diagnostic ignored "-Warray-bounds"
-#elif defined POSH_COMPILER_MSVC
-#   define NV_CC_MSVC   1
-#   define NV_CC_STRING "msvc"
-#else
-#   error "Unsupported compiler"
-#endif
-
-#ifndef NV_CC_GNUC
-#	define NV_CC_GNUC  0
-#endif // NV_CC_GNUC
-
-#ifndef NV_CC_MSVC
-#	define NV_CC_MSVC  0
-#endif // NV_CC_MSVC
-
-#ifndef NV_CC_CLANG
-#	define NV_CC_CLANG 0
-#endif // NV_CC_CLANG
-
-#if NV_CC_MSVC
-#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
-#else
-// @@ IC: This works in CLANG, about GCC?
-// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
-#ifdef __clang__
-#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
-#elif defined __GNUC__ 
-#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
-#endif
-#endif
-
-// Endiannes:
-#define NV_LITTLE_ENDIAN    POSH_LITTLE_ENDIAN
-#define NV_BIG_ENDIAN       POSH_BIG_ENDIAN
-#define NV_ENDIAN_STRING    POSH_ENDIAN_STRING
-
-
-// Type definitions:
-typedef posh_u8_t   uint8;
-typedef posh_i8_t   int8;
-
-typedef posh_u16_t  uint16;
-typedef posh_i16_t  int16;
-
-typedef posh_u32_t  uint32;
-typedef posh_i32_t  int32;
-
-typedef posh_u64_t  uint64;
-typedef posh_i64_t  int64;
-
-// Aliases
-typedef uint32      uint;
-
-
-// Version string:
-#define NV_VERSION_STRING \
-    NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
-    NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
-
-
-// Disable copy constructor and assignment operator. 
-#if NV_CC_CPP11
-#define NV_FORBID_COPY(C) \
-    C( const C & ) = delete; \
-    C &operator=( const C & ) = delete
-#else
-#define NV_FORBID_COPY(C) \
-    private: \
-    C( const C & ); \
-    C &operator=( const C & )
-#endif
-
-// Disable dynamic allocation on the heap. 
-// See Prohibiting Heap-Based Objects in More Effective C++.
-#define NV_FORBID_HEAPALLOC() \
-    private: \
-    void *operator new(size_t size); \
-    void *operator new[](size_t size)
-
-// String concatenation macros.
-#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
-#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
-#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
-#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
-#define NV_STRING2(x) #x
-#define NV_STRING(x) NV_STRING2(x)
-
-#if NV_CC_MSVC
-#define NV_MULTI_LINE_MACRO_BEGIN do {  
-#define NV_MULTI_LINE_MACRO_END \
-    __pragma(warning(push)) \
-    __pragma(warning(disable:4127)) \
-    } while(false) \
-    __pragma(warning(pop))  
-#else
-#define NV_MULTI_LINE_MACRO_BEGIN do {
-#define NV_MULTI_LINE_MACRO_END } while(false)
-#endif
-
-#if NV_CC_CPP11
-#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
-#else
-#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
-#endif
-#define NV_COMPILER_CHECK(x) nvStaticCheck(x)   // I like this name best.
-
-// Make sure type definitions are fine.
-NV_COMPILER_CHECK(sizeof(int8) == 1);
-NV_COMPILER_CHECK(sizeof(uint8) == 1);
-NV_COMPILER_CHECK(sizeof(int16) == 2);
-NV_COMPILER_CHECK(sizeof(uint16) == 2);
-NV_COMPILER_CHECK(sizeof(int32) == 4);
-NV_COMPILER_CHECK(sizeof(uint32) == 4);
-NV_COMPILER_CHECK(sizeof(int32) == 4);
-NV_COMPILER_CHECK(sizeof(uint32) == 4);
-
-
-#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
-
-#if 0 // Disabled in The Witness.
-#if NV_CC_MSVC
-#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
-#else
-#define NV_MESSAGE(x) message(x)
-#endif
-#else
-#define NV_MESSAGE(x) 
-#endif
-
-
-// Startup initialization macro.
-#define NV_AT_STARTUP(some_code) \
-    namespace { \
-        static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
-            NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
-        } \
-        NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
-    }
-
-// Indicate the compiler that the parameter is not used to suppress compier warnings.
-#define NV_UNUSED(a) ((a)=(a))
-
-// Null index. @@ Move this somewhere else... it's only used by nvmesh.
-//const unsigned int NIL = unsigned int(~0);
-//#define NIL uint(~0)
-
-// Null pointer.
-#ifndef NULL
-#define NULL 0
-#endif
-
-// Platform includes
-#if NV_CC_MSVC
-#   if NV_OS_WIN32
-#       include "defsvcwin32.h"
-#   elif NV_OS_XBOX
-#       include "defsvcxbox.h"
-#   else
-#       error "MSVC: Platform not supported"
-#   endif
-#elif NV_CC_GNUC
-#   if NV_OS_LINUX
-#       include "defsgnuclinux.h"
-#   elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
-#       include "defsgnucdarwin.h"
-#   elif NV_OS_MINGW
-#       include "defsgnucwin32.h"
-#   elif NV_OS_CYGWIN
-#       error "GCC: Cygwin not supported"
-#   else
-#       error "GCC: Platform not supported"
-#   endif
-#endif
-
-#endif // NV_CORE_H

+ 0 - 1030
3rdparty/nvtt/nvcore/posh.h

@@ -1,1030 +0,0 @@
-/**
-@file posh.h
-@author Brian Hook
-@version 1.3.001
-
-Header file for POSH, the Portable Open Source Harness project.
-
-NOTE: Unlike most header files, this one is designed to be included
-multiple times, which is why it does not have the @#ifndef/@#define
-preamble.
-
-POSH relies on environment specified preprocessor symbols in order
-to infer as much as possible about the target OS/architecture and
-the host compiler capabilities.
-
-NOTE: POSH is simple and focused. It attempts to provide basic
-functionality and information, but it does NOT attempt to emulate
-missing functionality.  I am also not willing to make POSH dirty
-and hackish to support truly ancient and/or outmoded and/or bizarre
-technologies such as non-ANSI compilers, systems with non-IEEE
-floating point formats, segmented 16-bit operating systems, etc.
-
-Please refer to the accompanying HTML documentation or visit
-http://www.poshlib.org for more information on how to use POSH.
-
-LICENSE:
-
-Copyright (c) 2004, Brian Hook
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above
-      copyright notice, this list of conditions and the following
-      disclaimer in the documentation and/or other materials provided
-      with the distribution.
-
-    * The names of this package'ss contributors contributors may not
-      be used to endorse or promote products derived from this
-      software without specific prior written permission.
-
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-REVISION:
-
-I've been lax about revision histories, so this starts at, um, 1.3.001.
-Sorry for any inconveniences.
-
-1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary,
-                      where I was not detecting Visual Studio
-                      compilation on x86-64 systems.  Added check for
-                      _M_X64 which should fix that.
-
-*/
-/*
-I have yet to find an authoritative reference on preprocessor
-symbols, but so far this is what I've gleaned:
-
-GNU GCC/G++:
-   - __GNUC__: GNU C version
-   - __GNUG__: GNU C++ compiler
-   - __sun__ : on Sun platforms
-   - __svr4__: on Solaris and other SysV R4 platforms
-   - __mips__: on MIPS processor platforms
-   - __sparc_v9__: on Sparc 64-bit CPUs
-   - __sparcv9: 64-bit Solaris
-   - __MIPSEL__: mips processor, compiled for little endian
-   - __MIPSEB__: mips processor, compiled for big endian
-   - _R5900: MIPS/Sony/Toshiba R5900 (PS2)
-   - mc68000: 68K
-   - m68000: 68K
-   - m68k: 68K
-   - __palmos__: PalmOS
-
-Intel C/C++ Compiler:
-   - __ECC      : compiler version, IA64 only
-   - __EDG__
-   - __ELF__
-   - __GXX_ABI_VERSION
-   - __i386     : IA-32 only
-   - __i386__   : IA-32 only
-   - i386       : IA-32 only
-   - __ia64     : IA-64 only
-   - __ia64__   : IA-64 only
-   - ia64       : IA-64 only
-   - __ICC      : IA-32 only
-   - __INTEL_COMPILER : IA-32 or IA-64, newer versions only
-
-Apple's C/C++ Compiler for OS X:
-   - __APPLE_CC__
-   - __APPLE__
-   - __BIG_ENDIAN__
-   - __APPLE__
-   - __ppc__
-   - __MACH__
-
-DJGPP:
-   - __MSDOS__
-   - __unix__
-   - __unix
-   - __GNUC__
-   - __GO32
-   - DJGPP
-   - __i386, __i386, i386
-
-Cray's C compiler:
-   - _ADDR64: if 64-bit pointers
-   - _UNICOS: 
-   - __unix:
-
-SGI's CC compiler predefines the following (and more) with -ansi:
-   - __sgi
-   - __unix
-   - __host_mips
-   - _SYSTYPE_SVR4
-   - __mips
-   - _MIPSEB
-   - anyone know if there is a predefined symbol for the compiler?!
-
-MinGW:
-   - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others
-   - __MINGW32__
-
-Cygwin:
-   - as Gnu C, but also
-   - __unix__
-   - __CYGWIN32__
-
-Microsoft Visual Studio predefines the following:
-   - _MSC_VER
-   - _WIN32: on Win32
-   - _M_IX6 (on x86 systems)
-   - _M_X64: on x86-64 systems
-   - _M_ALPHA (on DEC AXP systems)
-   - _SH3: WinCE, Hitachi SH-3
-   - _MIPS: WinCE, MIPS
-   - _ARM: WinCE, ARM
-
-Sun's C Compiler:
-   - sun and _sun
-   - unix and _unix
-   - sparc and _sparc (SPARC systems only)
-   - i386 and _i386 (x86 systems only)
-   - __SVR4 (Solaris only)
-   - __sparcv9: 64-bit solaris
-   - __SUNPRO_C
-   - _LP64: defined in 64-bit LP64 mode, but only if <sys/types.h> is included
-
-Borland C/C++ predefines the following:
-   - __BORLANDC__:
-
-DEC/Compaq C/C++ on Alpha:
-   - __alpha
-   - __arch64__
-   - __unix__ (on Tru64 Unix)
-   - __osf__
-   - __DECC
-   - __DECCXX (C++ compilation)
-   - __DECC_VER
-   - __DECCXX_VER
-
-IBM's AIX compiler:
-   - __64BIT__ if 64-bit mode
-   - _AIX
-   - __IBMC__: C compiler version
-   - __IBMCPP__: C++ compiler version
-   - _LONG_LONG: compiler allows long long
-
-Watcom:
-   - __WATCOMC__
-   - __DOS__ : if targeting DOS
-   - __386__ : if 32-bit support
-   - __WIN32__ : if targetin 32-bit Windows
-
-HP-UX C/C++ Compiler:
-   - __hpux
-   - __unix
-   - __hppa (on PA-RISC)
-   - __LP64__: if compiled in 64-bit mode
-
-Metrowerks:
-   - __MWERKS__
-   - __powerpc__
-   - _powerc
-   - __MC68K__
-   - macintosh when compiling for MacOS
-   - __INTEL__ for x86 targets
-   - __POWERPC__
-
-LLVM:
-   - __llvm__
-   - __clang__
-*/
-
-/*
-** ----------------------------------------------------------------------------
-** Include <limits.h> optionally
-** ----------------------------------------------------------------------------
-*/
-#ifdef POSH_USE_LIMITS_H
-#  include <limits.h>
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Determine compilation environment
-** ----------------------------------------------------------------------------
-*/
-#if defined __ECC || defined __ICC || defined __INTEL_COMPILER
-#  define POSH_COMPILER_STRING "Intel C/C++"
-#  define POSH_COMPILER_INTEL 1
-#endif
-
-#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__
-#  define POSH_COMPILER_STRING    "MIPSpro C/C++"
-#  define POSH_COMPILER_MIPSPRO 1 
-#endif
-
-#if defined __hpux && !defined __GNUC__
-#  define POSH_COMPILER_STRING "HP-UX CC"
-#  define POSH_COMPILER_HPCC 1 
-#endif
-
-#if defined __clang__
-#  define POSH_COMPILER_STRING "Clang"
-#  define POSH_COMPILER_CLANG 1
-#endif
-
-#if defined __GNUC__ && !defined __clang__
-#  define POSH_COMPILER_STRING "Gnu GCC"
-#  define POSH_COMPILER_GCC 1
-#endif
-
-#if defined __APPLE_CC__
-   /* we don't define the compiler string here, let it be GNU */
-#  define POSH_COMPILER_APPLECC 1
-#endif
-
-#if defined __IBMC__ || defined __IBMCPP__
-#  define POSH_COMPILER_STRING "IBM C/C++"
-#  define POSH_COMPILER_IBM 1
-#endif
-
-#if defined _MSC_VER
-#  define POSH_COMPILER_STRING "Microsoft Visual C++"
-#  define POSH_COMPILER_MSVC 1
-#endif
-
-#if defined __SUNPRO_C
-#  define POSH_COMPILER_STRING "Sun Pro" 
-#  define POSH_COMPILER_SUN 1
-#endif
-
-#if defined __BORLANDC__
-#  define POSH_COMPILER_STRING "Borland C/C++"
-#  define POSH_COMPILER_BORLAND 1
-#endif
-
-#if defined __MWERKS__
-#  define POSH_COMPILER_STRING     "MetroWerks CodeWarrior"
-#  define POSH_COMPILER_METROWERKS 1
-#endif
-
-#if defined __DECC || defined __DECCXX
-#  define POSH_COMPILER_STRING "Compaq/DEC C/C++"
-#  define POSH_COMPILER_DEC 1
-#endif
-
-#if defined __WATCOMC__
-#  define POSH_COMPILER_STRING "Watcom C/C++"
-#  define POSH_COMPILER_WATCOM 1
-#endif
-
-#if !defined POSH_COMPILER_STRING
-#  define POSH_COMPILER_STRING "Unknown compiler"
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Determine target operating system
-** ----------------------------------------------------------------------------
-*/
-#if defined linux || defined __linux__
-#  define POSH_OS_LINUX 1 
-#  define POSH_OS_STRING "Linux"
-#endif
-
-#if defined __FreeBSD__
-#  define POSH_OS_FREEBSD 1 
-#  define POSH_OS_STRING "FreeBSD"
-#endif
-
-#if defined __OpenBSD__
-#  define POSH_OS_OPENBSD 1
-#  define POSH_OS_STRING "OpenBSD"
-#endif
-
-#if defined __CYGWIN32__
-#  define POSH_OS_CYGWIN32 1
-#  define POSH_OS_STRING "Cygwin"
-#endif
-
-#if defined GEKKO
-#  define POSH_OS_GAMECUBE
-#  define __powerpc__
-#  define POSH_OS_STRING "GameCube"
-#endif
-
-#if defined __MINGW32__
-#  define POSH_OS_MINGW 1
-#  define POSH_OS_STRING "MinGW"
-#endif
-
-#if defined GO32 && defined DJGPP && defined __MSDOS__ 
-#  define POSH_OS_GO32 1
-#  define POSH_OS_STRING "GO32/MS-DOS"
-#endif
-
-/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS,
-   otherwise Watcom assumes host=target */
-#if defined __WATCOMC__  && defined __386__ && defined __DOS__
-#  define POSH_OS_DOS32 1
-#  define POSH_OS_STRING "DOS/32-bit"
-#endif
-
-#if defined _UNICOS
-#  define POSH_OS_UNICOS 1
-#  define POSH_OS_STRING "UNICOS"
-#endif
-
-#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
-#  define POSH_OS_OSX 1
-#  define POSH_OS_STRING "MacOS X"
-#endif
-
-#if defined __sun__ || defined sun || defined __sun || defined __solaris__
-#  if defined __SVR4 || defined __svr4__ || defined __solaris__
-#     define POSH_OS_STRING "Solaris"
-#     define POSH_OS_SOLARIS 1
-#  endif
-#  if !defined POSH_OS_STRING
-#     define POSH_OS_STRING "SunOS"
-#     define POSH_OS_SUNOS 1
-#  endif
-#endif
-
-#if defined __sgi__ || defined sgi || defined __sgi
-#  define POSH_OS_IRIX 1
-#  define POSH_OS_STRING "Irix"
-#endif
-
-#if defined __hpux__ || defined __hpux
-#  define POSH_OS_HPUX 1
-#  define POSH_OS_STRING "HP-UX"
-#endif
-
-#if defined _AIX
-#  define POSH_OS_AIX 1
-#  define POSH_OS_STRING "AIX"
-#endif
-
-#if ( defined __alpha && defined __osf__ )
-#  define POSH_OS_TRU64 1
-#  define POSH_OS_STRING "Tru64"
-#endif
-
-#if defined __BEOS__ || defined __beos__
-#  define POSH_OS_BEOS 1
-#  define POSH_OS_STRING "BeOS"
-#endif
-
-#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA
-#  define POSH_OS_AMIGA 1
-#  define POSH_OS_STRING "Amiga"
-#endif
-
-#if defined __unix__
-#  define POSH_OS_UNIX 1 
-#  if !defined POSH_OS_STRING
-#     define POSH_OS_STRING "Unix-like(generic)"
-#  endif
-#endif
-
-#if defined _WIN32_WCE
-#  define POSH_OS_WINCE 1
-#  define POSH_OS_STRING "Windows CE"
-#endif
-
-#if defined _XBOX || defined _XBOX_VER
-#  define POSH_OS_XBOX 1
-#  define POSH_OS_STRING "XBOX"
-#endif
-
-#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__
-#  define POSH_OS_WIN32 1
-#  if !defined POSH_OS_XBOX
-#     if defined _WIN64
-#        define POSH_OS_WIN64 1
-#        if !defined POSH_OS_STRING
-#           define POSH_OS_STRING "Win64"
-#        endif // !defined POSH_OS_STRING
-#     else
-#        if !defined POSH_OS_STRING
-#           define POSH_OS_STRING "Win32"
-#        endif
-#     endif
-#  endif
-#endif
-
-#if defined __palmos__
-#  define POSH_OS_PALM 1
-#  define POSH_OS_STRING "PalmOS"
-#endif
-
-#if defined THINK_C || defined macintosh
-#  define POSH_OS_MACOS 1
-#  define POSH_OS_STRING "MacOS"
-#endif
-
-/*
-** -----------------------------------------------------------------------------
-** Determine target CPU
-** -----------------------------------------------------------------------------
-*/
-
-#if defined GEKKO
-#  define POSH_CPU_PPC750 1
-#  define POSH_CPU_STRING "IBM PowerPC 750 (NGC)"
-#endif
-
-#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000
-#  define POSH_CPU_68K 1
-#  define POSH_CPU_STRING "MC68000"
-#endif
-
-#if defined __PPC__ || defined __POWERPC__  || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ || defined _M_PPC
-#  define POSH_CPU_PPC 1
-#  if !defined POSH_CPU_STRING
-#    if defined __powerpc64__
-#       define POSH_CPU_STRING "PowerPC64"
-#    else
-#       define POSH_CPU_STRING "PowerPC"
-#    endif
-#  endif
-#endif
-
-#if defined _CRAYT3E || defined _CRAYMPP
-#  define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/
-#  define POSH_CPU_STRING "Cray T3E (Alpha 21164)"
-#endif
-
-#if defined CRAY || defined _CRAY && !defined _CRAYT3E
-#  error Non-AXP Cray systems not supported
-#endif
-
-#if defined _SH3
-#  define POSH_CPU_SH3 1
-#  define POSH_CPU_STRING "Hitachi SH-3"
-#endif
-
-#if defined __sh4__ || defined __SH4__
-#  define POSH_CPU_SH3 1
-#  define POSH_CPU_SH4 1
-#  define POSH_CPU_STRING "Hitachi SH-4"
-#endif
-
-#if defined __sparc__ || defined __sparc
-#  if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__
-#     define POSH_CPU_SPARC64 1 
-#     define POSH_CPU_STRING "Sparc/64"
-#  else
-#     define POSH_CPU_STRING "Sparc/32"
-#  endif
-#  define POSH_CPU_SPARC 1
-#endif
-
-#if defined ARM || defined __arm__ || defined _ARM
-#  define POSH_CPU_STRONGARM 1
-#  define POSH_CPU_STRING "ARM"
-#endif
-
-#if defined __aarch64__
-#  define POSH_CPU_AARCH64 1
-#  define POSH_CPU_STRING "ARM64"
-#endif
-
-#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS
-#  define POSH_CPU_MIPS 1 
-#  if defined _R5900
-#    define POSH_CPU_STRING "MIPS R5900 (PS2)"
-#  else
-#    define POSH_CPU_STRING "MIPS"
-#  endif
-#endif
-
-#if defined __ia64 || defined _M_IA64 || defined __ia64__ 
-#  define POSH_CPU_IA64 1
-#  define POSH_CPU_STRING "IA64"
-#endif
-
-#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64
-#  define POSH_CPU_X86 1
-#  if defined __x86_64__ || defined _M_X64
-#     define POSH_CPU_X86_64 1 
-#  endif
-#  if defined POSH_CPU_X86_64
-#     define POSH_CPU_STRING "AMD x86-64"
-#  else
-#     define POSH_CPU_STRING "Intel 386+"
-#  endif
-#endif
-
-#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__
-#  define POSH_CPU_AXP 1
-#  define POSH_CPU_STRING "AXP"
-#endif
-
-#if defined __hppa || defined hppa
-#  define POSH_CPU_HPPA 1
-#  define POSH_CPU_STRING "PA-RISC"
-#endif
-
-#if !defined POSH_CPU_STRING
-#  error POSH cannot determine target CPU
-#  define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */
-#endif
-
-/*
-** -----------------------------------------------------------------------------
-** Attempt to autodetect building for embedded on Sony PS2
-** -----------------------------------------------------------------------------
-*/
-#if !defined POSH_OS_STRING
-#  if !defined FORCE_DOXYGEN
-#    define POSH_OS_EMBEDDED 1 
-#  endif
-#  if defined _R5900
-#     define POSH_OS_STRING "Sony PS2(embedded)"
-#  else
-#     define POSH_OS_STRING "Embedded/Unknown"
-#  endif
-#endif
-
-/*
-** ---------------------------------------------------------------------------
-** Handle cdecl, stdcall, fastcall, etc.
-** ---------------------------------------------------------------------------
-*/
-#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64
-#  if defined __GNUC__
-#     define POSH_CDECL __attribute__((cdecl))
-#     define POSH_STDCALL __attribute__((stdcall))
-#     define POSH_FASTCALL __attribute__((fastcall))
-#  elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ )
-#     define POSH_CDECL    __cdecl
-#     define POSH_STDCALL  __stdcall
-#     define POSH_FASTCALL __fastcall
-#  endif
-#else
-#  define POSH_CDECL    
-#  define POSH_STDCALL  
-#  define POSH_FASTCALL 
-#endif
-
-/*
-** ---------------------------------------------------------------------------
-** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB
-** ---------------------------------------------------------------------------
-*/
-
-/*
-** We undefine this so that multiple inclusions will work
-*/
-#if defined POSH_IMPORTEXPORT
-#  undef POSH_IMPORTEXPORT
-#endif
-
-#if defined POSH_DLL
-#   if defined POSH_OS_WIN32
-#      if defined _MSC_VER 
-#         if ( _MSC_VER >= 800 )
-#            if defined POSH_BUILDING_LIB
-#               define POSH_IMPORTEXPORT __declspec( dllexport )
-#            else
-#               define POSH_IMPORTEXPORT __declspec( dllimport )
-#            endif
-#         else
-#            if defined POSH_BUILDING_LIB
-#               define POSH_IMPORTEXPORT __export
-#            else
-#               define POSH_IMPORTEXPORT 
-#            endif
-#         endif
-#      endif  /* defined _MSC_VER */
-#      if defined __BORLANDC__
-#         if ( __BORLANDC__ >= 0x500 )
-#            if defined POSH_BUILDING_LIB 
-#               define POSH_IMPORTEXPORT __declspec( dllexport )
-#            else
-#               define POSH_IMPORTEXPORT __declspec( dllimport )
-#            endif
-#         else
-#            if defined POSH_BUILDING_LIB
-#               define POSH_IMPORTEXPORT __export
-#            else
-#               define POSH_IMPORTEXPORT 
-#            endif
-#         endif
-#      endif /* defined __BORLANDC__ */
-       /* for all other compilers, we're just making a blanket assumption */
-#      if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__
-#         if defined POSH_BUILDING_LIB
-#            define POSH_IMPORTEXPORT __declspec( dllexport )
-#         else
-#            define POSH_IMPORTEXPORT __declspec( dllimport )
-#         endif
-#      endif /* all other compilers */
-#      if !defined POSH_IMPORTEXPORT
-#         error Building DLLs not supported on this compiler ([email protected] if you know how)
-#      endif
-#   endif /* defined POSH_OS_WIN32 */
-#endif
-
-/* On pretty much everything else, we can thankfully just ignore this */
-#if !defined POSH_IMPORTEXPORT
-#  define POSH_IMPORTEXPORT
-#endif
-
-#if defined FORCE_DOXYGEN
-#  define POSH_DLL    
-#  define POSH_BUILDING_LIB
-#  undef POSH_DLL
-#  undef POSH_BUILDING_LIB
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** (Re)define POSH_PUBLIC_API export signature 
-** ----------------------------------------------------------------------------
-*/
-#ifdef POSH_PUBLIC_API
-#  undef POSH_PUBLIC_API
-#endif
-
-#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) )
-#  define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT 
-#else
-#  define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Try to infer endianess.  Basically we just go through the CPUs we know are
-** little endian, and assume anything that isn't one of those is big endian.
-** As a sanity check, we also do this with operating systems we know are
-** little endian, such as Windows.  Some processors are bi-endian, such as 
-** the MIPS series, so we have to be careful about those.
-** ----------------------------------------------------------------------------
-*/
-#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__
-#  define POSH_ENDIAN_STRING "little"
-#  define POSH_LITTLE_ENDIAN 1
-#else
-#  define POSH_ENDIAN_STRING "big"
-#  define POSH_BIG_ENDIAN 1
-#endif
-
-#if defined FORCE_DOXYGEN
-#  define POSH_LITTLE_ENDIAN
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** Cross-platform compile time assertion macro
-** ----------------------------------------------------------------------------
-*/
-#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ]
-
-/*
-** ----------------------------------------------------------------------------
-** 64-bit Integer
-**
-** We don't require 64-bit support, nor do we emulate its functionality, we
-** simply export it if it's available.  Since we can't count on <limits.h>
-** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive.
-** ----------------------------------------------------------------------------
-*/
-#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64
-#  define POSH_64BIT_INTEGER 1
-typedef long posh_i64_t; 
-typedef unsigned long posh_u64_t;
-#  define POSH_I64( x ) ((posh_i64_t)x)
-#  define POSH_U64( x ) ((posh_u64_t)x)
-#  define POSH_I64_PRINTF_PREFIX "l"
-#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC )
-#  define POSH_64BIT_INTEGER 1
-typedef __int64 posh_i64_t;
-typedef unsigned __int64 posh_u64_t;
-#  define POSH_I64( x ) ((posh_i64_t)(x##i64))
-#  define POSH_U64( x ) ((posh_u64_t)(x##ui64))
-#  define POSH_I64_PRINTF_PREFIX "I64"
-#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC
-#  define POSH_64BIT_INTEGER 1
-typedef long long posh_i64_t;
-typedef unsigned long long posh_u64_t;
-#  define POSH_U64( x ) ((posh_u64_t)(x##LL))
-#  define POSH_I64( x ) ((posh_i64_t)(x##LL))
-#  define POSH_I64_PRINTF_PREFIX "ll"
-#endif
-
-/* hack */
-/*#ifdef __MINGW32__
-#undef POSH_I64
-#undef POSH_U64
-#undef POSH_I64_PRINTF_PREFIX
-#define POSH_I64( x ) ((posh_i64_t)x)
-#define POSH_U64( x ) ((posh_u64_t)x)
-#define POSH_I64_PRINTF_PREFIX "I64"
-#endif*/
-
-#ifdef FORCE_DOXYGEN
-typedef long long posh_i64_t;
-typedef unsigned long posh_u64_t;
-#  define POSH_64BIT_INTEGER
-#  define POSH_I64_PRINTF_PREFIX
-#  define POSH_I64(x)
-#  define POSH_U64(x)
-#endif
-
-/** Minimum value for a 64-bit signed integer */
-#define POSH_I64_MIN  POSH_I64(0x8000000000000000)
-/** Maximum value for a 64-bit signed integer */
-#define POSH_I64_MAX  POSH_I64(0x7FFFFFFFFFFFFFFF)
-/** Minimum value for a 64-bit unsigned integer */
-#define POSH_U64_MIN  POSH_U64(0)
-/** Maximum value for a 64-bit unsigned integer */
-#define POSH_U64_MAX  POSH_U64(0xFFFFFFFFFFFFFFFF)
-
-/* ----------------------------------------------------------------------------
-** Basic Sized Types
-**
-** These types are expected to be EXACTLY sized so you can use them for
-** serialization.
-** ----------------------------------------------------------------------------
-*/
-#define POSH_FALSE 0 
-#define POSH_TRUE  1 
-
-typedef int            posh_bool_t;
-typedef unsigned char  posh_byte_t;
-
-/* NOTE: These assume that CHAR_BIT is 8!! */
-typedef unsigned char  posh_u8_t;
-typedef signed char    posh_i8_t;
-
-#if defined POSH_USE_LIMITS_H
-#  if CHAR_BITS > 8
-#    error This machine uses 9-bit characters.  This is a warning, you can comment this out now.
-#  endif /* CHAR_BITS > 8 */
-
-/* 16-bit */
-#  if ( USHRT_MAX == 65535 ) 
-   typedef unsigned short posh_u16_t;
-   typedef short          posh_i16_t;
-#  else
-   /* Yes, in theory there could still be a 16-bit character type and shorts are
-      32-bits in size...if you find such an architecture, let me know =P */
-#    error No 16-bit type found
-#  endif
-
-/* 32-bit */
-#  if ( INT_MAX == 2147483647 )
-  typedef unsigned       posh_u32_t;
-  typedef int            posh_i32_t;
-#  elif ( LONG_MAX == 2147483647 )
-  typedef unsigned long  posh_u32_t;
-  typedef long           posh_i32_t;
-#  else
-      error No 32-bit type found
-#  endif
-
-#else /* POSH_USE_LIMITS_H */
-
-  typedef unsigned short posh_u16_t;
-  typedef short          posh_i16_t;
-
-#  if !defined POSH_OS_PALM
-  typedef unsigned       posh_u32_t;
-  typedef int            posh_i32_t;
-#  else
-  typedef unsigned long  posh_u32_t;
-  typedef long           posh_i32_t;
-#  endif
-#endif
-
-/** Minimum value for a byte */
-#define POSH_BYTE_MIN    0
-/** Maximum value for an 8-bit unsigned value */
-#define POSH_BYTE_MAX    255
-/** Minimum value for a byte */
-#define POSH_I16_MIN     ( ( posh_i16_t ) 0x8000 )
-/** Maximum value for a 16-bit signed value */
-#define POSH_I16_MAX     ( ( posh_i16_t ) 0x7FFF ) 
-/** Minimum value for a 16-bit unsigned value */
-#define POSH_U16_MIN     0
-/** Maximum value for a 16-bit unsigned value */
-#define POSH_U16_MAX     ( ( posh_u16_t ) 0xFFFF )
-/** Minimum value for a 32-bit signed value */
-#define POSH_I32_MIN     ( ( posh_i32_t ) 0x80000000 )
-/** Maximum value for a 32-bit signed value */
-#define POSH_I32_MAX     ( ( posh_i32_t ) 0x7FFFFFFF )
-/** Minimum value for a 32-bit unsigned value */
-#define POSH_U32_MIN     0
-/** Maximum value for a 32-bit unsigned value */
-#define POSH_U32_MAX     ( ( posh_u32_t ) 0xFFFFFFFF )
-
-/*
-** ----------------------------------------------------------------------------
-** Sanity checks on expected sizes
-** ----------------------------------------------------------------------------
-*/
-#if !defined FORCE_DOXYGEN
-
-POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1);
-POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1);
-POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1);
-POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2);
-POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2);
-POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4);
-POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4);
-
-#if !defined POSH_NO_FLOAT
-   POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 );
-   POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8);
-#endif
-
-#if defined POSH_64BIT_INTEGER
-   POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8);
-   POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8);
-#endif
-
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** 64-bit pointer support
-** ----------------------------------------------------------------------------
-*/
-#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX )
-#  define POSH_64BIT_POINTER 1
-#endif
-
-#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX
-#  define POSH_64BIT_POINTER 1
-#endif
-
-#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
-#   define POSH_64BIT_POINTER 1
-#endif
-
-#if defined POSH_64BIT_POINTER
-   POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 );
-#elif !defined FORCE_DOXYGEN
-/* if this assertion is hit then you're on a system that either has 64-bit
-   addressing and we didn't catch it, or you're on a system with 16-bit
-   pointers.  In the latter case, POSH doesn't actually care, we're just
-   triggering this assertion to make sure you're aware of the situation,
-   so feel free to delete it.
-
-   If this assertion is triggered on a known 32 or 64-bit platform, 
-   please let us know ([email protected]) */
-   POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 );
-#endif
-
-#if defined FORCE_DOXYGEN
-#  define POSH_64BIT_POINTER
-#endif
-
-/*
-** ----------------------------------------------------------------------------
-** POSH Utility Functions
-**
-** These are optional POSH utility functions that are not required if you don't
-** need anything except static checking of your host and target environment.
-** 
-** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want
-** to enforce their export if your own library is only using them internally.
-** ----------------------------------------------------------------------------
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const char *POSH_GetArchString( void );
-
-#if !defined POSH_NO_FLOAT
-
-posh_u32_t  POSH_LittleFloatBits( float f );
-posh_u32_t  POSH_BigFloatBits( float f );
-float       POSH_FloatFromLittleBits( posh_u32_t bits );
-float       POSH_FloatFromBigBits( posh_u32_t bits );
-
-void        POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] );
-double      POSH_DoubleFromBits( const posh_byte_t src[ 8 ] );
-
-/* unimplemented
-float      *POSH_WriteFloatToLittle( void *dst, float f );
-float      *POSH_WriteFloatToBig( void *dst, float f );
-float       POSH_ReadFloatFromLittle( const void *src );
-float       POSH_ReadFloatFromBig( const void *src );
-
-double     *POSH_WriteDoubleToLittle( void *dst, double d );
-double     *POSH_WriteDoubleToBig( void *dst, double d );
-double      POSH_ReadDoubleFromLittle( const void *src );
-double      POSH_ReadDoubleFromBig( const void *src );
-*/
-#endif /* !defined POSH_NO_FLOAT */
-
-#if defined FORCE_DOXYGEN
-#  define POSH_NO_FLOAT
-#  undef  POSH_NO_FLOAT
-#endif
-
-extern posh_u16_t  POSH_SwapU16( posh_u16_t u );
-extern posh_i16_t  POSH_SwapI16( posh_i16_t u );
-extern posh_u32_t  POSH_SwapU32( posh_u32_t u );
-extern posh_i32_t  POSH_SwapI32( posh_i32_t u );
-
-#if defined POSH_64BIT_INTEGER
-
-extern posh_u64_t  POSH_SwapU64( posh_u64_t u );
-extern posh_i64_t  POSH_SwapI64( posh_i64_t u );
-
-#endif /*POSH_64BIT_INTEGER */
-
-extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value );
-extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value );
-extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value );
-extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value );
-
-extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value );
-extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value );
-extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value );
-extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value );
-
-extern posh_u16_t  POSH_ReadU16FromLittle( const void *src );
-extern posh_i16_t  POSH_ReadI16FromLittle( const void *src );
-extern posh_u32_t  POSH_ReadU32FromLittle( const void *src );
-extern posh_i32_t  POSH_ReadI32FromLittle( const void *src );
-
-extern posh_u16_t  POSH_ReadU16FromBig( const void *src );
-extern posh_i16_t  POSH_ReadI16FromBig( const void *src );
-extern posh_u32_t  POSH_ReadU32FromBig( const void *src );
-extern posh_i32_t  POSH_ReadI32FromBig( const void *src );
-
-#if defined POSH_64BIT_INTEGER
-extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value );
-extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value );
-extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value );
-extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value );
-
-extern posh_u64_t  POSH_ReadU64FromLittle( const void *src );
-extern posh_i64_t  POSH_ReadI64FromLittle( const void *src );
-extern posh_u64_t  POSH_ReadU64FromBig( const void *src );
-extern posh_i64_t  POSH_ReadI64FromBig( const void *src );
-#endif /* POSH_64BIT_INTEGER */
-
-#if defined POSH_LITTLE_ENDIAN
-
-#  define POSH_LittleU16(x) (x)
-#  define POSH_LittleU32(x) (x)
-#  define POSH_LittleI16(x) (x)
-#  define POSH_LittleI32(x) (x)
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_LittleU64(x) (x)
-#    define POSH_LittleI64(x) (x)
-#  endif /* defined POSH_64BIT_INTEGER */
-
-#  define POSH_BigU16(x) POSH_SwapU16(x)
-#  define POSH_BigU32(x) POSH_SwapU32(x)
-#  define POSH_BigI16(x) POSH_SwapI16(x)
-#  define POSH_BigI32(x) POSH_SwapI32(x)
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_BigU64(x) POSH_SwapU64(x)
-#    define POSH_BigI64(x) POSH_SwapI64(x)
-#  endif /* defined POSH_64BIT_INTEGER */
-
-#else
-
-#  define POSH_BigU16(x) (x)
-#  define POSH_BigU32(x) (x)
-#  define POSH_BigI16(x) (x)
-#  define POSH_BigI32(x) (x)
-
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_BigU64(x) (x)
-#    define POSH_BigI64(x) (x)
-#  endif /* POSH_64BIT_INTEGER */
-
-#  define POSH_LittleU16(x) POSH_SwapU16(x)
-#  define POSH_LittleU32(x) POSH_SwapU32(x)
-#  define POSH_LittleI16(x) POSH_SwapI16(x)
-#  define POSH_LittleI32(x) POSH_SwapI32(x)
-
-#  if defined POSH_64BIT_INTEGER
-#    define POSH_LittleU64(x) POSH_SwapU64(x)
-#    define POSH_LittleI64(x) POSH_SwapI64(x)
-#  endif /* POSH_64BIT_INTEGER */
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif

+ 0 - 459
3rdparty/nvtt/nvcore/stdstream.h

@@ -1,459 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#include "nvcore.h"
-#include "stream.h"
-#include "array.h"
-
-#include <stdio.h> // fopen
-#include <string.h> // memcpy
-
-namespace nv
-{
-
-    // Portable version of fopen.
-    inline FILE * fileOpen(const char * fileName, const char * mode)
-    {
-        nvCheck(fileName != NULL);
-#if NV_CC_MSVC && _MSC_VER >= 1400
-        FILE * fp;
-        if (fopen_s(&fp, fileName, mode) == 0) {
-            return fp;
-        }
-        return NULL;
-#else
-        return fopen(fileName, mode);
-#endif
-    }
-
-
-    /// Base stdio stream.
-    class NVCORE_CLASS StdStream : public Stream
-    {
-        NV_FORBID_COPY(StdStream);
-    public:
-
-        /// Ctor.
-        StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
-
-        /// Dtor. 
-        virtual ~StdStream()
-        {
-            if( m_fp != NULL && m_autoclose ) {
-#if NV_OS_WIN32
-                _fclose_nolock( m_fp );
-#else
-                fclose( m_fp );
-#endif
-            }
-        }
-
-
-        /** @name Stream implementation. */
-        //@{
-        virtual void seek( uint pos )
-        {
-            nvDebugCheck(m_fp != NULL);
-            nvDebugCheck(pos <= size());
-#if NV_OS_WIN32
-            _fseek_nolock(m_fp, pos, SEEK_SET);
-#else
-            fseek(m_fp, pos, SEEK_SET);
-#endif
-        }
-
-        virtual uint tell() const
-        {
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            return _ftell_nolock(m_fp);
-#else
-            return (uint)ftell(m_fp);
-#endif
-        }
-
-        virtual uint size() const
-        {
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            uint pos = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, 0, SEEK_END);
-            uint end = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, pos, SEEK_SET);
-#else
-            uint pos = (uint)ftell(m_fp);
-            fseek(m_fp, 0, SEEK_END);
-            uint end = (uint)ftell(m_fp);
-            fseek(m_fp, pos, SEEK_SET);
-#endif
-            return end;
-        }
-
-        virtual bool isError() const
-        {
-            return m_fp == NULL || ferror( m_fp ) != 0;
-        }
-
-        virtual void clearError()
-        {
-            nvDebugCheck(m_fp != NULL);
-            clearerr(m_fp);
-        }
-
-        // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. 
-        // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better 
-        // implementation uses use ftell and fseek to determine our location within the file.
-        virtual bool isAtEnd() const
-        {
-            if (m_fp == NULL) return true;
-            //nvDebugCheck(m_fp != NULL);
-            //return feof( m_fp ) != 0;
-#if NV_OS_WIN32
-            uint pos = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, 0, SEEK_END);
-            uint end = _ftell_nolock(m_fp);
-            _fseek_nolock(m_fp, pos, SEEK_SET);
-#else
-            uint pos = (uint)ftell(m_fp);
-            fseek(m_fp, 0, SEEK_END);
-            uint end = (uint)ftell(m_fp);
-            fseek(m_fp, pos, SEEK_SET);
-#endif
-            return pos == end;
-        }
-
-        /// Always true.
-        virtual bool isSeekable() const { return true; }
-        //@}
-
-    protected:
-
-        FILE * m_fp;
-        bool m_autoclose;
-
-    };
-
-
-    /// Standard output stream.
-    class NVCORE_CLASS StdOutputStream : public StdStream
-    {
-        NV_FORBID_COPY(StdOutputStream);
-    public:
-
-        /// Construct stream by file name.
-        StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
-
-        /// Construct stream by file handle.
-        StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
-        {
-        }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Write data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            return (uint)_fwrite_nolock(data, 1, len, m_fp);
-#elif NV_OS_LINUX
-            return (uint)fwrite_unlocked(data, 1, len, m_fp);
-#elif NV_OS_DARWIN
-            // @@ No error checking, always returns len.
-            for (uint i = 0; i < len; i++) {
-                putc_unlocked(((char *)data)[i], m_fp);
-            }
-            return len;
-#else
-            return (uint)fwrite(data, 1, len, m_fp);
-#endif
-        }
-
-        virtual bool isLoading() const
-        {
-            return false;
-        }
-
-        virtual bool isSaving() const
-        {
-            return true;
-        }
-        //@}
-
-    };
-
-
-    /// Standard input stream.
-    class NVCORE_CLASS StdInputStream : public StdStream
-    {
-        NV_FORBID_COPY(StdInputStream);
-    public:
-
-        /// Construct stream by file name.
-        StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
-
-        /// Construct stream by file handle.
-        StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
-        {
-        }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Read data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            nvDebugCheck(m_fp != NULL);
-#if NV_OS_WIN32
-            return (uint)_fread_nolock(data, 1, len, m_fp);
-#elif NV_OS_LINUX
-            return (uint)fread_unlocked(data, 1, len, m_fp);
-#elif NV_OS_DARWIN
-            // @@ No error checking, always returns len.
-            for (uint i = 0; i < len; i++) {
-                ((char *)data)[i] = getc_unlocked(m_fp);
-            }
-            return len;
-#else
-            return (uint)fread(data, 1, len, m_fp);
-#endif
-            
-        }
-
-        virtual bool isLoading() const
-        {
-            return true;
-        }
-
-        virtual bool isSaving() const
-        {
-            return false;
-        }
-        //@}
-    };
-
-
-
-    /// Memory input stream.
-    class NVCORE_CLASS MemoryInputStream : public Stream
-    {
-        NV_FORBID_COPY(MemoryInputStream);
-    public:
-
-        /// Ctor.
-        MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Read data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            nvDebugCheck(!isError());
-
-            uint left = m_size - tell();
-            if (len > left) len = left;
-
-            memcpy( data, m_ptr, len );
-            m_ptr += len;
-
-            return len;
-        }
-
-        virtual void seek( uint pos )
-        {
-            nvDebugCheck(!isError());
-            m_ptr = m_mem + pos;
-            nvDebugCheck(!isError());
-        }
-
-        virtual uint tell() const
-        {
-            nvDebugCheck(m_ptr >= m_mem);
-            return uint(m_ptr - m_mem);
-        }
-
-        virtual uint size() const
-        {
-            return m_size;
-        }
-
-        virtual bool isError() const
-        {
-            return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
-        }
-
-        virtual void clearError()
-        {
-            // Nothing to do.
-        }
-
-        virtual bool isAtEnd() const
-        {
-            return m_ptr == m_mem + m_size;
-        }
-
-        /// Always true.
-        virtual bool isSeekable() const
-        {
-            return true;
-        }
-
-        virtual bool isLoading() const
-        {
-            return true;
-        }
-
-        virtual bool isSaving() const
-        {
-            return false;
-        }
-        //@}
-
-        const uint8 * ptr() const { return m_ptr; }
-
-
-    private:
-
-        const uint8 * m_mem;
-        const uint8 * m_ptr;
-        uint m_size;
-
-    };
-
-
-    /// Buffer output stream.
-    class NVCORE_CLASS BufferOutputStream : public Stream
-    {
-        NV_FORBID_COPY(BufferOutputStream);
-    public:
-
-        BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
-
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            m_buffer.append((uint8 *)data, len);
-            return len;
-        }
-
-        virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
-        virtual uint tell() const { return m_buffer.size(); }
-        virtual uint size() const { return m_buffer.size(); }
-
-        virtual bool isError() const { return false; }
-        virtual void clearError() {}
-
-        virtual bool isAtEnd() const { return true; }
-        virtual bool isSeekable() const { return false; }
-        virtual bool isLoading() const { return false; }
-        virtual bool isSaving() const { return true; }
-
-    private:
-        Array<uint8> & m_buffer;
-    };
-
-
-    /// Protected input stream.
-    class NVCORE_CLASS ProtectedStream : public Stream
-    {
-        NV_FORBID_COPY(ProtectedStream);
-    public:
-
-        /// Ctor.
-        ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
-        { 
-        }
-
-        /// Ctor.
-        ProtectedStream( Stream * s, bool autodelete = true ) : 
-        m_s(s), m_autodelete(autodelete) 
-        {
-            nvDebugCheck(m_s != NULL);
-        }
-
-        /// Dtor.
-        virtual ~ProtectedStream()
-        {
-            if( m_autodelete ) {
-                delete m_s;
-            }
-        }
-
-        /** @name Stream implementation. */
-        //@{
-        /// Read data.
-        virtual uint serialize( void * data, uint len )
-        {
-            nvDebugCheck(data != NULL);
-            len = m_s->serialize( data, len );
-
-            if( m_s->isError() ) {
-                throw;
-            }
-
-            return len;
-        }
-
-        virtual void seek( uint pos )
-        {
-            m_s->seek( pos );
-
-            if( m_s->isError() ) {
-                throw;
-            }
-        }
-
-        virtual uint tell() const
-        {
-            return m_s->tell();
-        }
-
-        virtual uint size() const
-        {
-            return m_s->size();
-        }
-
-        virtual bool isError() const
-        {
-            return m_s->isError();
-        }
-
-        virtual void clearError()
-        {
-            m_s->clearError();
-        }
-
-        virtual bool isAtEnd() const
-        {
-            return m_s->isAtEnd();
-        }
-
-        virtual bool isSeekable() const
-        {
-            return m_s->isSeekable();
-        }
-
-        virtual bool isLoading() const
-        {
-            return m_s->isLoading();
-        }
-
-        virtual bool isSaving() const
-        {
-            return m_s->isSaving();
-        }
-        //@}
-
-
-    private:
-
-        Stream * const m_s;
-        bool const m_autodelete;
-
-    };
-
-} // nv namespace
-
-
-//#endif // NV_CORE_STDSTREAM_H

+ 0 - 163
3rdparty/nvtt/nvcore/stream.h

@@ -1,163 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_STREAM_H
-#define NV_CORE_STREAM_H
-
-#include "nvcore.h"
-#include "debug.h"
-
-namespace nv
-{
-
-    /// Base stream class.
-    class NVCORE_CLASS Stream {
-    public:
-
-        enum ByteOrder {
-            LittleEndian = false,
-            BigEndian = true,
-        };
-
-        /// Get the byte order of the system.
-        static ByteOrder getSystemByteOrder() { 
-#if NV_LITTLE_ENDIAN
-            return LittleEndian;
-#else
-            return BigEndian;
-#endif
-        }
-
-
-        /// Ctor.
-        Stream() : m_byteOrder(LittleEndian) { }
-
-        /// Virtual destructor.
-        virtual ~Stream() {}
-
-        /// Set byte order.
-        void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
-
-        /// Get byte order.
-        ByteOrder byteOrder() const { return m_byteOrder; }
-
-
-        /// Serialize the given data.
-        virtual uint serialize( void * data, uint len ) = 0;
-
-        /// Move to the given position in the archive.
-        virtual void seek( uint pos ) = 0;
-
-        /// Return the current position in the archive.
-        virtual uint tell() const = 0;
-
-        /// Return the current size of the archive.
-        virtual uint size() const = 0;
-
-        /// Determine if there has been any error.
-        virtual bool isError() const = 0;
-
-        /// Clear errors.
-        virtual void clearError() = 0;
-
-        /// Return true if the stream is at the end.
-        virtual bool isAtEnd() const = 0;
-
-        /// Return true if the stream is seekable.
-        virtual bool isSeekable() const = 0;
-
-        /// Return true if this is an input stream.
-        virtual bool isLoading() const = 0;
-
-        /// Return true if this is an output stream.
-        virtual bool isSaving() const = 0;
-
-
-        void advance(uint offset) { seek(tell() + offset); }
-
-
-        // friends	
-        friend Stream & operator<<( Stream & s, bool & c ) {
-#if NV_OS_DARWIN && !NV_CC_CPP11
-            nvStaticCheck(sizeof(bool) == 4);
-            uint8 b = c ? 1 : 0;
-            s.serialize( &b, 1 );
-            c = (b == 1);
-#else
-            nvStaticCheck(sizeof(bool) == 1);
-            s.serialize( &c, 1 );
-#endif
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, char & c ) {
-            nvStaticCheck(sizeof(char) == 1);
-            s.serialize( &c, 1 );
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, uint8 & c ) {
-            nvStaticCheck(sizeof(uint8) == 1);
-            s.serialize( &c, 1 );
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, int8 & c ) {
-            nvStaticCheck(sizeof(int8) == 1);
-            s.serialize( &c, 1 );
-            return s;
-        }
-        friend Stream & operator<<( Stream & s, uint16 & c ) {
-            nvStaticCheck(sizeof(uint16) == 2);
-            return s.byteOrderSerialize( &c, 2 );
-        }
-        friend Stream & operator<<( Stream & s, int16 & c ) {
-            nvStaticCheck(sizeof(int16) == 2);
-            return s.byteOrderSerialize( &c, 2 );
-        }
-        friend Stream & operator<<( Stream & s, uint32 & c ) {
-            nvStaticCheck(sizeof(uint32) == 4);
-            return s.byteOrderSerialize( &c, 4 );
-        }
-        friend Stream & operator<<( Stream & s, int32 & c ) {
-            nvStaticCheck(sizeof(int32) == 4);
-            return s.byteOrderSerialize( &c, 4 );
-        }
-        friend Stream & operator<<( Stream & s, uint64 & c ) {
-            nvStaticCheck(sizeof(uint64) == 8);
-            return s.byteOrderSerialize( &c, 8 );
-        }
-        friend Stream & operator<<( Stream & s, int64 & c ) {
-            nvStaticCheck(sizeof(int64) == 8);
-            return s.byteOrderSerialize( &c, 8 );
-        }
-        friend Stream & operator<<( Stream & s, float & c ) {
-            nvStaticCheck(sizeof(float) == 4);
-            return s.byteOrderSerialize( &c, 4 );
-        }
-        friend Stream & operator<<( Stream & s, double & c ) {
-            nvStaticCheck(sizeof(double) == 8);
-            return s.byteOrderSerialize( &c, 8 );
-        }
-
-    protected:
-
-        /// Serialize in the stream byte order.
-        Stream & byteOrderSerialize( void * v, uint len ) {
-            if( m_byteOrder == getSystemByteOrder() ) {
-                serialize( v, len );
-            }
-            else {
-                for( uint i = len; i > 0; i-- ) {
-                    serialize( (uint8 *)v + i - 1, 1 );
-                }
-            }
-            return *this;
-        }
-
-
-    private:
-
-        ByteOrder m_byteOrder;
-
-    };
-
-} // nv namespace
-
-#endif // NV_CORE_STREAM_H

+ 0 - 429
3rdparty/nvtt/nvcore/strlib.h

@@ -1,429 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_STRING_H
-#define NV_CORE_STRING_H
-
-#include "debug.h"
-#include "hash.h" // hash
-
-//#include <string.h> // strlen, etc.
-
-#if NV_OS_WIN32
-#define NV_PATH_SEPARATOR '\\'
-#else
-#define NV_PATH_SEPARATOR '/'
-#endif
-
-namespace nv
-{
-
-    NVCORE_API uint strHash(const char * str, uint h) NV_PURE;
-
-    /// String hash based on Bernstein's hash.
-    inline uint strHash(const char * data, uint h = 5381)
-    {
-        uint i = 0;
-        while(data[i] != 0) {
-            h = (33 * h) ^ uint(data[i]);
-            i++;
-        }
-        return h;
-    }
-
-    template <> struct Hash<const char *> {
-        uint operator()(const char * str) const { return strHash(str); }
-    };
-
-    NVCORE_API uint strLen(const char * str) NV_PURE;                       // Asserts on NULL strings.
-
-    NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE;       // Asserts on NULL strings.
-    NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE;   // Asserts on NULL strings.
-    NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE;     // Accepts NULL strings.
-    NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
-
-    template <> struct Equal<const char *> {
-        bool operator()(const char * a, const char * b) const { return strEqual(a, b); }
-    };
-
-    NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE;
-    NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE;
-
-
-    NVCORE_API void strCpy(char * dst, uint size, const char * src);
-    NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
-    NVCORE_API void strCat(char * dst, uint size, const char * src);
-
-    NVCORE_API const char * strSkipWhiteSpace(const char * str);
-    NVCORE_API char * strSkipWhiteSpace(char * str);
-
-    NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
-
-    NVCORE_API bool isNumber(const char * str) NV_PURE;
-
-    /* @@ Implement these two functions and modify StringBuilder to use them?
-    NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
-    NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
-
-    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
-        va_list args;
-        va_start(args, fmt);
-        strFormatList(buffer, count, fmt, args);
-        va_end(args);
-    }
-    template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
-        va_list tmp;
-        va_copy(tmp, args);
-        strFormatList(buffer, count, fmt, tmp);
-        va_end(tmp);
-    }*/
-
-    template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
-        strCpy(buffer, count, src);
-    }
-
-    template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
-        strCat(buffer, count, src);
-    }
-
-
-
-    /// String builder.
-    class NVCORE_CLASS StringBuilder
-    {
-    public:
-
-        StringBuilder();
-        explicit StringBuilder( uint size_hint );
-        StringBuilder(const char * str);
-        StringBuilder(const char * str, uint len);
-        StringBuilder(const StringBuilder & other);
-
-        ~StringBuilder();
-
-        StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
-        StringBuilder & formatList( const char * format, va_list arg );
-
-        StringBuilder & append(const char * str);
-		StringBuilder & append(const char * str, uint len);
-        StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
-        StringBuilder & appendFormatList(const char * format, va_list arg);
-
-        StringBuilder & appendSpace(uint n);
-
-        StringBuilder & number( int i, int base = 10 );
-        StringBuilder & number( uint i, int base = 10 );
-
-        StringBuilder & reserve(uint size_hint);
-        StringBuilder & copy(const char * str);
-        StringBuilder & copy(const char * str, uint len);
-        StringBuilder & copy(const StringBuilder & str);
-
-        StringBuilder & toLower();
-        StringBuilder & toUpper();
-
-        bool endsWith(const char * str) const;
-        bool beginsWith(const char * str) const;
-
-        char * reverseFind(char c);
-
-        void reset();
-        bool isNull() const { return m_size == 0; }
-
-        // const char * accessors
-        //operator const char * () const { return m_str; }
-        //operator char * () { return m_str; }
-        const char * str() const { return m_str; }
-        char * str() { return m_str; }
-
-        char * release();
-
-        /// Implement value semantics.
-        StringBuilder & operator=( const StringBuilder & s ) {
-            return copy(s);
-        }
-
-        /// Implement value semantics.
-        StringBuilder & operator=( const char * s ) {
-            return copy(s);
-        }
-
-        /// Equal operator.
-        bool operator==( const StringBuilder & s ) const {
-            return strMatch(s.m_str, m_str);
-        }
-
-        /// Return the exact length.
-        uint length() const { return isNull() ? 0 : strLen(m_str); }
-
-        /// Return the size of the string container.
-        uint capacity() const { return m_size; }
-
-        /// Return the hash of the string.
-        uint hash() const { return isNull() ? 0 : strHash(m_str); }
-
-        // Swap strings.
-        friend void swap(StringBuilder & a, StringBuilder & b);
-
-    protected:
-
-        /// Size of the string container.
-        uint m_size;
-
-        /// String.
-        char * m_str;
-
-    };
-
-
-    /// Path string. @@ This should be called PathBuilder.
-    class NVCORE_CLASS Path : public StringBuilder
-    {
-    public:
-        Path() : StringBuilder() {}
-        explicit Path(int size_hint) : StringBuilder(size_hint) {}
-        Path(const char * str) : StringBuilder(str) {}
-        Path(const Path & path) : StringBuilder(path) {}
-
-        const char * fileName() const;
-        const char * extension() const;
-
-        void translatePath(char pathSeparator = NV_PATH_SEPARATOR);
-
-        void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR);
-
-        void stripFileName();
-        void stripExtension();
-
-        // statics
-        NVCORE_API static char separator();
-        NVCORE_API static const char * fileName(const char *);
-        NVCORE_API static const char * extension(const char *);
-
-        NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
-    };
-
-
-    /// String class.
-    class NVCORE_CLASS String
-    {
-    public:
-
-        /// Constructs a null string. @sa isNull()
-        String()
-        {
-            data = NULL;
-        }
-
-        /// Constructs a shared copy of str.
-        String(const String & str)
-        {
-            data = str.data;
-            if (data != NULL) addRef();
-        }
-
-        /// Constructs a shared string from a standard string.
-        String(const char * str)
-        {
-            setString(str);
-        }
-
-        /// Constructs a shared string from a standard string.
-        String(const char * str, int length)
-        {
-            setString(str, length);
-        }
-
-        /// Constructs a shared string from a StringBuilder.
-        String(const StringBuilder & str)
-        {
-            setString(str);
-        }
-
-        /// Dtor.
-        ~String()
-        {
-            release();
-        }
-
-        String clone() const;
-
-        /// Release the current string and allocate a new one.
-        const String & operator=( const char * str )
-        {
-            release();
-            setString( str );
-            return *this;
-        }
-
-        /// Release the current string and allocate a new one.
-        const String & operator=( const StringBuilder & str )
-        {
-            release();
-            setString( str );
-            return *this;
-        }
-
-        /// Implement value semantics.
-        String & operator=( const String & str )
-        {
-            if (str.data != data)
-            {
-                release();
-                data = str.data;
-                addRef();
-            }
-            return *this;
-        }
-
-        /// Equal operator.
-        bool operator==( const String & str ) const
-        {
-            return strMatch(str.data, data);
-        }
-
-        /// Equal operator.
-        bool operator==( const char * str ) const
-        {
-            return strMatch(str, data);
-        }
-
-        /// Not equal operator.
-        bool operator!=( const String & str ) const
-        {
-            return !strMatch(str.data, data);
-        }
-
-        /// Not equal operator.
-        bool operator!=( const char * str ) const
-        {
-            return !strMatch(str, data);
-        }
-
-        /// Returns true if this string is the null string.
-        bool isNull() const { return data == NULL; }
-
-        /// Return the exact length.
-        uint length() const { nvDebugCheck(data != NULL); return strLen(data); }
-
-        /// Return the hash of the string.
-        uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
-
-        /// const char * cast operator.
-        operator const char * () const { return data; }
-
-        /// Get string pointer.
-        const char * str() const { return data; }
-
-
-    private:
-
-        // Add reference count.
-        void addRef();
-
-        // Decrease reference count.
-        void release();
-
-        uint16 getRefCount() const
-        {
-            nvDebugCheck(data != NULL);
-            return *reinterpret_cast<const uint16 *>(data - 2);
-        }
-
-        void setRefCount(uint16 count) {
-            nvDebugCheck(data != NULL);
-            nvCheck(count < 0xFFFF);
-            *reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
-        }
-
-        void setData(const char * str) {
-            data = str + 2;
-        }
-
-        void allocString(const char * str)
-        {
-            allocString(str, strLen(str));
-        }
-
-        void allocString(const char * str, uint length);
-
-        void setString(const char * str);
-        void setString(const char * str, uint length);
-        void setString(const StringBuilder & str);
-
-        // Swap strings.
-        friend void swap(String & a, String & b);
-
-    private:
-
-        const char * data;
-
-    };
-
-    template <> struct Hash<String> {
-        uint operator()(const String & str) const { return str.hash(); }
-    };
-
-
-    // Like AutoPtr, but for const char strings.
-    class AutoString
-    {
-        NV_FORBID_COPY(AutoString);
-        NV_FORBID_HEAPALLOC();
-    public:
-
-        // Ctor.
-        AutoString(const char * p = NULL) : m_ptr(p) { }
-
-#if NV_CC_CPP11
-        // Move ctor.
-        AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
-#endif
-        
-        // Dtor. Deletes owned pointer.
-        ~AutoString() {
-            delete [] m_ptr;
-            m_ptr = NULL;
-        }
-
-        // Delete owned pointer and assign new one.
-        void operator=(const char * p) {
-            if (p != m_ptr) 
-            {
-                delete [] m_ptr;
-                m_ptr = p;
-            }
-        }
-
-        // Get pointer.
-        const char * ptr() const { return m_ptr; }
-        operator const char *() const { return m_ptr; }
-
-        // Relinquish ownership of the underlying pointer and returns that pointer.
-        const char * release() {
-            const char * tmp = m_ptr;
-            m_ptr = NULL;
-            return tmp;
-        }
-
-        // comparison operators.
-        friend bool operator == (const AutoString & ap, const char * const p) {
-            return (ap.ptr() == p);
-        }
-        friend bool operator != (const AutoString & ap, const char * const p) {
-            return (ap.ptr() != p);
-        }
-        friend bool operator == (const char * const p, const AutoString & ap) {
-            return (ap.ptr() == p);
-        }
-        friend bool operator != (const char * const p, const AutoString & ap) {
-            return (ap.ptr() != p);
-        }
-
-    private:
-        const char * m_ptr;
-    };
-
-} // nv namespace
-
-#endif // NV_CORE_STRING_H

+ 0 - 281
3rdparty/nvtt/nvcore/utils.h

@@ -1,281 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <[email protected]>
-
-#ifndef NV_CORE_UTILS_H
-#define NV_CORE_UTILS_H
-
-#include "debug.h" // nvdebugcheck
-
-#include <new> // for placement new
-
-
-// Just in case. Grrr.
-#undef min
-#undef max
-
-#define NV_INT8_MIN    (-128)
-#define NV_INT8_MAX    127
-#define NV_UINT8_MAX    255
-#define NV_INT16_MIN    (-32767-1)
-#define NV_INT16_MAX    32767
-#define NV_UINT16_MAX   0xffff
-#define NV_INT32_MIN    (-2147483647-1)
-#define NV_INT32_MAX    2147483647
-#define NV_UINT32_MAX   0xffffffff
-#define NV_INT64_MAX    POSH_I64(9223372036854775807)
-#define NV_INT64_MIN    (-POSH_I64(9223372036854775807)-1)
-#define NV_UINT64_MAX   POSH_U64(0xffffffffffffffff)
-
-#define NV_HALF_MAX     65504.0F
-#define NV_FLOAT_MAX    3.402823466e+38F
-
-#define NV_INTEGER_TO_FLOAT_MAX  16777217     // Largest integer such that it and all smaller integers can be stored in a 32bit float.
-
-
-namespace nv
-{
-    // Less error prone than casting. From CB:
-    // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
-
-    // These intentionally look like casts.
-
-    // uint32 casts:
-    template <typename T> inline uint32 U32(T x) { return x; }
-    template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
-    template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
-    //template <> inline uint32 U32<uint32>(uint32 x) { return x; }
-    template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
-    //template <> inline uint32 U32<uint16>(uint16 x) { return x; }
-    template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
-    //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
-    template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
-
-    // int32 casts:
-    template <typename T> inline int32 I32(T x) { return x; }
-    template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
-    template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
-    template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
-    //template <> inline int32 I32<int32>(int32 x) { return x; }
-    //template <> inline int32 I32<uint16>(uint16 x) { return x; }
-    //template <> inline int32 I32<int16>(int16 x) { return x; }
-    //template <> inline int32 I32<uint8>(uint8 x) { return x; }
-    //template <> inline int32 I32<int8>(int8 x) { return x; }
-
-    // uint16 casts:
-    template <typename T> inline uint16 U16(T x) { return x; }
-    template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
-    template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
-    template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
-    template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
-    //template <> inline uint16 U16<uint16>(uint16 x) { return x; }
-    template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
-    //template <> inline uint16 U16<uint8>(uint8 x) { return x; }
-    template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
-
-    // int16 casts:
-    template <typename T> inline int16 I16(T x) { return x; }
-    template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
-    template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
-    template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
-    template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
-    template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
-    //template <> inline int16 I16<int16>(int16 x) { return x; }
-    //template <> inline int16 I16<uint8>(uint8 x) { return x; }
-    //template <> inline int16 I16<int8>(int8 x) { return x; }
-
-    // uint8 casts:
-    template <typename T> inline uint8 U8(T x) { return x; }
-    template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
-    template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
-    //template <> inline uint8 U8<uint8>(uint8 x) { return x; }
-    template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
-    //template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
-
-    // int8 casts:
-    template <typename T> inline int8 I8(T x) { return x; }
-    template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
-    template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
-    template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
-    template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
-    //template <> inline int8 I8<int8>(int8 x) { return x; }
-
-    // float casts:
-    template <typename T> inline float F32(T x) { return x; }
-    template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
-    // The compiler should not complain about these conversions:
-    //template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
-    //template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
-    //template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
-    //template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
-
-
-    /// Swap two values.
-    template <typename T> 
-    inline void swap(T & a, T & b)
-    {
-        T temp(a);
-        a = b; 
-        b = temp;
-    }
-
-    /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
-    template <typename T> 
-    //inline const T & max(const T & a, const T & b)
-    inline T max(const T & a, const T & b)
-    {
-        return (b < a) ? a : b;
-    }
-
-	/// Return the maximum of the four arguments.
-	template <typename T> 
-	//inline const T & max4(const T & a, const T & b, const T & c)
-	inline T max4(const T & a, const T & b, const T & c, const T & d)
-	{
-		return max(max(a, b), max(c, d));
-	}
-
-    /// Return the maximum of the three arguments.
-    template <typename T> 
-    //inline const T & max3(const T & a, const T & b, const T & c)
-    inline T max3(const T & a, const T & b, const T & c)
-    {
-        return max(a, max(b, c));
-    }
-
-    /// Return the minimum of two values.
-    template <typename T> 
-    //inline const T & min(const T & a, const T & b)
-    inline T min(const T & a, const T & b)
-    {
-        return (a < b) ? a : b;
-    }
-
-    /// Return the maximum of the three arguments.
-    template <typename T> 
-    //inline const T & min3(const T & a, const T & b, const T & c)
-    inline T min3(const T & a, const T & b, const T & c)
-    {
-        return min(a, min(b, c));
-    }
-
-    /// Clamp between two values.
-    template <typename T> 
-    //inline const T & clamp(const T & x, const T & a, const T & b)
-    inline T clamp(const T & x, const T & a, const T & b)
-    {
-        return min(max(x, a), b);
-    }
-
-    /** Return the next power of two. 
-    * @see http://graphics.stanford.edu/~seander/bithacks.html
-    * @warning Behaviour for 0 is undefined.
-    * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
-    * @note nextPowerOfTwo(x) = 2 << log2(x-1)
-    */
-    inline uint nextPowerOfTwo( uint x )
-    {
-        nvDebugCheck( x != 0 );
-#if 1	// On modern CPUs this is supposed to be as fast as using the bsr instruction.
-        x--;
-        x |= x >> 1;
-        x |= x >> 2;
-        x |= x >> 4;
-        x |= x >> 8;
-        x |= x >> 16;
-        return x+1;	
-#else
-        uint p = 1;
-        while( x > p ) {
-            p += p;
-        }
-        return p;
-#endif
-    }
-
-    /// Return true if @a n is a power of two.
-    inline bool isPowerOfTwo( uint n )
-    {
-        return (n & (n-1)) == 0;
-    }
-
-
-    // @@ Move this to utils?
-    /// Delete all the elements of a container.
-    template <typename T>
-    void deleteAll(T & container)
-    {
-        for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
-        {
-            delete container[i];
-        }
-    }
-
-
-
-    // @@ Specialize these methods for numeric, pointer, and pod types.
-
-    template <typename T>
-    void construct_range(T * restrict ptr, uint new_size, uint old_size) {
-        for (uint i = old_size; i < new_size; i++) {
-            new(ptr+i) T; // placement new
-        }
-    }
-
-    template <typename T>
-    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
-        for (uint i = old_size; i < new_size; i++) {
-            new(ptr+i) T(elem); // placement new
-        }
-    }
-
-    template <typename T>
-    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
-        for (uint i = old_size; i < new_size; i++) {
-            new(ptr+i) T(src[i]); // placement new
-        }
-    }
-
-    template <typename T>
-    void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
-        for (uint i = new_size; i < old_size; i++) {
-            (ptr+i)->~T(); // Explicit call to the destructor
-        }
-    }
-
-    template <typename T>
-    void fill(T * restrict dst, uint count, const T & value) {
-        for (uint i = 0; i < count; i++) {
-            dst[i] = value;
-        }
-    }
-
-    template <typename T>
-    void copy_range(T * restrict dst, const T * restrict src, uint count) {
-        for (uint i = 0; i < count; i++) {
-            dst[i] = src[i];
-        }
-    }
-
-    template <typename T>
-    bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
-        for (uint i = begin; i < end; i++) {
-            if (ptr[i] == element) {
-                if (index != NULL) *index = i;
-                return true;
-            }
-        }
-        return false;
-    }
-
-} // nv namespace
-
-#endif // NV_CORE_UTILS_H

Some files were not shown because too many files changed in this diff