Jelajahi Sumber

squish update (primarily to add additional formats for later usage)

Azaezel 8 tahun lalu
induk
melakukan
fba9580004

+ 0 - 52
Engine/lib/squish/ChangeLog

@@ -1,52 +0,0 @@
-1.10
-* Iterative cluster fit is now considered to be a new compression mode
-* The core cluster fit is now 4x faster using contributions by Ignacio
-Castano from NVIDIA
-* The single colour lookup table has been halved by exploiting symmetry
-
-1.9
-* Added contributed SSE1 truncate implementation
-* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions
-* Cluster fit is now iterative to further reduce image error
-
-1.8
-* Switched from using floor to trunc for much better SSE performance (again)
-* Xcode build now expects libpng in /usr/local for extra/squishpng
-
-1.7
-* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
-* Implemented proper SSE(2) floor function for 50% speedup on SSE builds 
-* The range fit implementation now uses the correct colour metric
-
-1.6
-* Fixed bug in CompressImage where masked pixels were not skipped over
-* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
-* Fixed major DXT1 bug that can generate unexpected transparent pixels
-
-1.5
-* Added CompressMasked function to handle incomplete DXT blocks more cleanly
-* Added kWeightColourByAlpha flag for better quality images when alpha blending
-
-1.4
-* Fixed stack overflow in rangefit
-
-1.3
-* Worked around SSE floor implementation bug, proper fix needed!
-* This release has visual studio and makefile builds that work
-
-1.2
-* Added provably optimal single colour compressor
-* Added extra/squishgen.cpp that generates single colour lookup tables
-
-1.1
-* Fixed a DXT1 colour output bug
-* Changed argument order for Decompress function to match Compress
-* Added GetStorageRequirements function
-* Added CompressImage function
-* Added DecompressImage function
-* Moved squishtool.cpp to extra/squishpng.cpp
-* Added extra/squishtest.cpp
-
-1.0
-* Initial release
-

+ 20 - 0
Engine/lib/squish/LICENSE

@@ -0,0 +1,20 @@
+	Copyright (c) 2006 Simon Brown                          [email protected]
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ 2 - 1
Engine/lib/squish/alpha.cpp

@@ -24,8 +24,9 @@
    -------------------------------------------------------------------------- */
    
 #include "alpha.h"
+
+#include <climits>
 #include <algorithm>
-#include <limits.h> 
 
 namespace squish {
 

+ 1 - 1
Engine/lib/squish/alpha.h

@@ -26,7 +26,7 @@
 #ifndef SQUISH_ALPHA_H
 #define SQUISH_ALPHA_H
 
-#include <squish.h>
+#include "squish.h"
 
 namespace squish {
 

+ 7 - 8
Engine/lib/squish/clusterfit.cpp

@@ -31,22 +31,21 @@
 
 namespace squish {
 
-ClusterFit::ClusterFit( ColourSet const* colours, int flags ) 
+ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 
   : ColourFit( colours, flags )
 {
 	// set the iteration count
 	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
 
-	// initialise the best error
-	m_besterror = VEC4_CONST( FLT_MAX );
-
-	// initialise the metric
-	bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
-	if( perceptual )
-		m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
+	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+	if( metric )
+		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
 	else
 		m_metric = VEC4_CONST( 1.0f );	
 
+	// initialise the best error
+	m_besterror = VEC4_CONST( FLT_MAX );
+
 	// cache some values
 	int const count = m_colours->GetCount();
 	Vec3 const* values = m_colours->GetPoints();

+ 2 - 2
Engine/lib/squish/clusterfit.h

@@ -27,7 +27,7 @@
 #ifndef SQUISH_CLUSTERFIT_H
 #define SQUISH_CLUSTERFIT_H
 
-#include <squish.h>
+#include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"
@@ -37,7 +37,7 @@ namespace squish {
 class ClusterFit : public ColourFit
 {
 public:
-	ClusterFit( ColourSet const* colours, int flags );
+	ClusterFit( ColourSet const* colours, int flags, float* metric );
 	
 private:
 	bool ConstructOrdering( Vec3 const& axis, int iteration );

+ 1 - 1
Engine/lib/squish/colourblock.h

@@ -26,7 +26,7 @@
 #ifndef SQUISH_COLOURBLOCK_H
 #define SQUISH_COLOURBLOCK_H
 
-#include <squish.h>
+#include "squish.h"
 #include "maths.h"
 
 namespace squish {

+ 4 - 0
Engine/lib/squish/colourfit.cpp

@@ -34,6 +34,10 @@ ColourFit::ColourFit( ColourSet const* colours, int flags )
 {
 }
 
+ColourFit::~ColourFit()
+{
+}
+
 void ColourFit::Compress( void* block )
 {
 	bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );

+ 4 - 1
Engine/lib/squish/colourfit.h

@@ -26,9 +26,11 @@
 #ifndef SQUISH_COLOURFIT_H
 #define SQUISH_COLOURFIT_H
 
-#include <squish.h>
+#include "squish.h"
 #include "maths.h"
 
+#include <climits>
+
 namespace squish {
 
 class ColourSet;
@@ -37,6 +39,7 @@ class ColourFit
 {
 public:
 	ColourFit( ColourSet const* colours, int flags );
+	virtual ~ColourFit();
 
 	void Compress( void* block );
 

+ 1 - 1
Engine/lib/squish/colourset.h

@@ -26,7 +26,7 @@
 #ifndef SQUISH_COLOURSET_H
 #define SQUISH_COLOURSET_H
 
-#include <squish.h>
+#include "squish.h"
 #include "maths.h"
 
 namespace squish {

+ 1 - 7
Engine/lib/squish/config.h

@@ -36,7 +36,7 @@
 #define SQUISH_USE_SSE 0
 #endif
 
-// Internally et SQUISH_USE_SIMD when either Altivec or SSE is available.
+// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
 #if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
 #error "Cannot enable both Altivec and SSE!"
 #endif
@@ -46,10 +46,4 @@
 #define SQUISH_USE_SIMD 0
 #endif
 
-// TORQUE MODIFICATIONS
-#ifdef TORQUE_DEBUG
-#  undef SQUISH_USE_SSE
-#  define SQUISH_USE_SSE 0
-#endif
-
 #endif // ndef SQUISH_CONFIG_H

+ 33 - 1
Engine/lib/squish/maths.cpp

@@ -30,6 +30,7 @@
 */
 
 #include "maths.h"
+#include "simd.h"
 #include <cfloat>
 
 namespace squish {
@@ -44,7 +45,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
 		total += weights[i];
 		centroid += weights[i]*points[i];
 	}
-	centroid /= total;
+	if( total > FLT_EPSILON )
+		centroid /= total;
 
 	// accumulate the covariance matrix
 	Sym3x3 covariance( 0.0f );
@@ -65,6 +67,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
 	return covariance;
 }
 
+#if 0
+
 static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
 {
 	// compute M
@@ -224,4 +228,32 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 	}
 }
 
+#else
+
+#define POWER_ITERATION_COUNT 	8
+
+Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
+{
+	Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
+	Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
+	Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
+	Vec4 v = VEC4_CONST( 1.0f );
+	for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
+	{
+		// matrix multiply
+		Vec4 w = row0*v.SplatX();
+		w = MultiplyAdd(row1, v.SplatY(), w);
+		w = MultiplyAdd(row2, v.SplatZ(), w);
+
+		// get max component from xyz in all channels
+		Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
+
+		// divide through and advance
+		v = w*Reciprocal(a);
+	}
+	return v.GetVec3();
+}
+
+#endif
+
 } // namespace squish

+ 5 - 6
Engine/lib/squish/rangefit.cpp

@@ -30,15 +30,14 @@
 
 namespace squish {
 
-RangeFit::RangeFit( ColourSet const* colours, int flags ) 
+RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) 
   : ColourFit( colours, flags )
 {
-	// initialise the metric
-	bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
-	if( perceptual )
-		m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f );
+	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+	if( metric )
+		m_metric = Vec3( metric[0], metric[1], metric[2] );
 	else
-		m_metric = Vec3( 1.0f );
+		m_metric = Vec3( 1.0f );	
 
 	// initialise the best error
 	m_besterror = FLT_MAX;

+ 2 - 2
Engine/lib/squish/rangefit.h

@@ -26,7 +26,7 @@
 #ifndef SQUISH_RANGEFIT_H
 #define SQUISH_RANGEFIT_H
 
-#include <squish.h>
+#include "squish.h"
 #include "colourfit.h"
 #include "maths.h"
 
@@ -37,7 +37,7 @@ class ColourSet;
 class RangeFit : public ColourFit
 {
 public:
-	RangeFit( ColourSet const* colours, int flags );
+	RangeFit( ColourSet const* colours, int flags, float* metric );
 	
 private:
 	virtual void Compress3( void* block );

+ 0 - 8
Engine/lib/squish/simd.h

@@ -27,14 +27,6 @@
 #define SQUISH_SIMD_H
 
 #include "maths.h"
-
-#if SQUISH_USE_ALTIVEC
-#include "simd_ve.h"
-#elif SQUISH_USE_SSE
-#include "simd_sse.h"
-#else
 #include "simd_float.h"
-#endif
-
 
 #endif // ndef SQUISH_SIMD_H

+ 0 - 180
Engine/lib/squish/simd_sse.h

@@ -1,180 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_SSE_H
-#define SQUISH_SIMD_SSE_H
-
-#include <xmmintrin.h>
-#if ( SQUISH_USE_SSE > 1 )
-#include <emmintrin.h>
-#endif
-
-#define SQUISH_SSE_SPLAT( a )										\
-	( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
-
-#define SQUISH_SSE_SHUF( x, y, z, w )								\
-	( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) )
-
-namespace squish {
-
-#define VEC4_CONST( X ) Vec4( X )
-
-class Vec4
-{
-public:
-	typedef Vec4 const& Arg;
-
-	Vec4() {}
-		
-	explicit Vec4( __m128 v ) : m_v( v ) {}
-	
-	Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
-	
-	Vec4& operator=( Vec4 const& arg )
-	{
-		m_v = arg.m_v;
-		return *this;
-	}
-	
-	explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {}
-	
-	Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {}
-	
-	Vec3 GetVec3() const
-	{
-#ifdef __GNUC__
-		__attribute__ ((__aligned__ (16))) float c[4];
-#else
-		__declspec(align(16)) float c[4];
-#endif
-		_mm_store_ps( c, m_v );
-		return Vec3( c[0], c[1], c[2] );
-	}
-	
-	Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
-	Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
-	Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
-	Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
-
-	Vec4& operator+=( Arg v )
-	{
-		m_v = _mm_add_ps( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator-=( Arg v )
-	{
-		m_v = _mm_sub_ps( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator*=( Arg v )
-	{
-		m_v = _mm_mul_ps( m_v, v.m_v );
-		return *this;
-	}
-	
-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
-	}
-	
-	//! Returns a*b + c
-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
-	}
-	
-	//! Returns -( a*b - c )
-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
-	}
-	
-	friend Vec4 Reciprocal( Vec4::Arg v )
-	{
-		// get the reciprocal estimate
-		__m128 estimate = _mm_rcp_ps( v.m_v );
-
-		// one round of Newton-Rhaphson refinement
-		__m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
-		return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
-	}
-	
-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Truncate( Vec4::Arg v )
-	{
-#if ( SQUISH_USE_SSE == 1 )
-		// convert to ints
-		__m128 input = v.m_v;
-		__m64 lo = _mm_cvttps_pi32( input );
-		__m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
-
-		// convert to floats
-		__m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
-		__m128 truncated = _mm_cvtpi32_ps( part, lo );
-		
-		// clear out the MMX multimedia state to allow FP calls later
-		_mm_empty(); 
-		return Vec4( truncated );
-#else
-		// use SSE2 instructions
-		return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
-#endif
-	}
-	
-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
-	{
-		__m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
-		int value = _mm_movemask_ps( bits );
-		return value != 0;
-	}
-	
-private:
-	__m128 m_v;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SIMD_SSE_H

+ 0 - 166
Engine/lib/squish/simd_ve.h

@@ -1,166 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          [email protected]
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_VE_H
-#define SQUISH_SIMD_VE_H
-
-#include <altivec.h>
-#undef bool
-
-namespace squish {
-
-#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
-
-class Vec4
-{
-public:
-	typedef Vec4 Arg;
-
-	Vec4() {}
-		
-	explicit Vec4( vector float v ) : m_v( v ) {}
-	
-	Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
-	
-	Vec4& operator=( Vec4 const& arg )
-	{
-		m_v = arg.m_v;
-		return *this;
-	}
-	
-	explicit Vec4( float s )
-	{
-		union { vector float v; float c[4]; } u;
-		u.c[0] = s;
-		u.c[1] = s;
-		u.c[2] = s;
-		u.c[3] = s;
-		m_v = u.v;
-	}
-	
-	Vec4( float x, float y, float z, float w )
-	{
-		union { vector float v; float c[4]; } u;
-		u.c[0] = x;
-		u.c[1] = y;
-		u.c[2] = z;
-		u.c[3] = w;
-		m_v = u.v;
-	}
-	
-	Vec3 GetVec3() const
-	{
-		union { vector float v; float c[4]; } u;
-		u.v = m_v;
-		return Vec3( u.c[0], u.c[1], u.c[2] );
-	}
-	
-	Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
-	Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
-	Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
-	Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
-
-	Vec4& operator+=( Arg v )
-	{
-		m_v = vec_add( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator-=( Arg v )
-	{
-		m_v = vec_sub( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator*=( Arg v )
-	{
-		m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) );
-		return *this;
-	}
-	
-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( vec_add( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( vec_sub( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) );
-	}
-	
-	//! Returns a*b + c
-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
-	}
-	
-	//! Returns -( a*b - c )
-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
-	}
-	
-	friend Vec4 Reciprocal( Vec4::Arg v )
-	{
-		// get the reciprocal estimate
-		vector float estimate = vec_re( v.m_v );
-		
-		// one round of Newton-Rhaphson refinement
-		vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) );
-		return Vec4( vec_madd( diff, estimate, estimate ) );
-	}
-	
-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( vec_min( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( vec_max( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Truncate( Vec4::Arg v )
-	{
-		return Vec4( vec_trunc( v.m_v ) );
-	}
-	
-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
-	{
-		return vec_any_lt( left.m_v, right.m_v ) != 0;
-	}
-	
-private:
-	vector float m_v;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SIMD_VE_H

+ 0 - 1
Engine/lib/squish/singlecolourfit.cpp

@@ -26,7 +26,6 @@
 #include "singlecolourfit.h"
 #include "colourset.h"
 #include "colourblock.h"
-#include <limits.h> 
 
 namespace squish {
 

+ 1 - 1
Engine/lib/squish/singlecolourfit.h

@@ -26,7 +26,7 @@
 #ifndef SQUISH_SINGLECOLOURFIT_H
 #define SQUISH_SINGLECOLOURFIT_H
 
-#include <squish.h>
+#include "squish.h"
 #include "colourfit.h"
 
 namespace squish {

+ 24 - 0
Engine/lib/squish/singlecolourlookup.inl

@@ -1,3 +1,27 @@
+/* -----------------------------------------------------------------------------
+
+	Copyright (c) 2006 Simon Brown                          [email protected]
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+	
+   -------------------------------------------------------------------------- */
 
 static SingleColourLookup const lookup_5_3[] = 
 {

+ 0 - 20
Engine/lib/squish/squish-Info.plist

@@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>CFBundleDevelopmentRegion</key>
-	<string>English</string>
-	<key>CFBundleExecutable</key>
-	<string>${EXECUTABLE_NAME}</string>
-	<key>CFBundleIdentifier</key>
-	<string>com.sjbrown.squish</string>
-	<key>CFBundleInfoDictionaryVersion</key>
-	<string>6.0</string>
-	<key>CFBundlePackageType</key>
-	<string>FMWK</string>
-	<key>CFBundleSignature</key>
-	<string>????</string>
-	<key>CFBundleVersion</key>
-	<string>1.0</string>
-</dict>
-</plist>

+ 47 - 26
Engine/lib/squish/squish.cpp

@@ -23,7 +23,7 @@
 	
    -------------------------------------------------------------------------- */
    
-#include <squish.h>
+#include "squish.h"
 #include "colourset.h"
 #include "maths.h"
 #include "rangefit.h"
@@ -37,37 +37,58 @@ namespace squish {
 static int FixFlags( int flags )
 {
 	// grab the flag bits
-	int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
+	int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
 	int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
-	int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform );
 	int extra = flags & kWeightColourByAlpha;
 	
 	// set defaults
-	if( method != kDxt3 && method != kDxt5 )
+	if ( method != kDxt3
+	&&   method != kDxt5
+	&&   method != kBc4
+	&&   method != kBc5 )
+	{
 		method = kDxt1;
-	if( fit != kColourRangeFit )
+	}
+	if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
 		fit = kColourClusterFit;
-	if( metric != kColourMetricUniform )
-		metric = kColourMetricPerceptual;
 		
 	// done
-	return method | fit | metric | extra;
-}
-
-void Compress( u8 const* rgba, void* block, int flags )
-{
-	// compress with full mask
-	CompressMasked( rgba, 0xffff, block, flags );
+	return method | fit | extra;
 }
 
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
 {
 	// fix any bad flags
 	flags = FixFlags( flags );
 
+	if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
+	{
+		u8 alpha[16*4];
+		for( int i = 0; i < 16; ++i )
+		{
+			alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
+		}
+
+		u8* rBlock = reinterpret_cast< u8* >( block );
+		CompressAlphaDxt5( alpha, mask, rBlock );
+
+		if ( ( flags & ( kBc5 ) ) != 0 )
+		{
+			for( int i = 0; i < 16; ++i )
+			{
+				alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
+			}
+
+			u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
+			CompressAlphaDxt5( alpha, mask, gBlock );
+		}
+
+		return;
+	}
+
 	// get the block locations
 	void* colourBlock = block;
-	void* alphaBock = block;
+	void* alphaBlock = block;
 	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
 		colourBlock = reinterpret_cast< u8* >( block ) + 8;
 
@@ -84,21 +105,21 @@ void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
 	else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
 	{
 		// do a range fit
-		RangeFit fit( &colours, flags );
+		RangeFit fit( &colours, flags, metric );
 		fit.Compress( colourBlock );
 	}
 	else
 	{
 		// default to a cluster fit (could be iterative or not)
-		ClusterFit fit( &colours, flags );
+		ClusterFit fit( &colours, flags, metric );
 		fit.Compress( colourBlock );
 	}
 	
 	// compress alpha separately if necessary
 	if( ( flags & kDxt3 ) != 0 )
-		CompressAlphaDxt3( rgba, mask, alphaBock );
+		CompressAlphaDxt3( rgba, mask, alphaBlock );
 	else if( ( flags & kDxt5 ) != 0 )
-		CompressAlphaDxt5( rgba, mask, alphaBock );
+		CompressAlphaDxt5( rgba, mask, alphaBlock );
 }
 
 void Decompress( u8* rgba, void const* block, int flags )
@@ -129,18 +150,18 @@ int GetStorageRequirements( int width, int height, int flags )
 	
 	// compute the storage requirements
 	int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
-	int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-	return blockcount*blocksize;	
+	int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+	return blockcount*blocksize;
 }
 
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags )
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
 {
 	// fix any bad flags
 	flags = FixFlags( flags );
 
 	// initialise the block output
 	u8* targetBlock = reinterpret_cast< u8* >( blocks );
-	int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
+	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
 
 	// loop over blocks
 	for( int y = 0; y < height; y += 4 )
@@ -179,7 +200,7 @@ void CompressImage( u8 const* rgba, int width, int height, void* blocks, int fla
 			}
 			
 			// compress it into the output
-			CompressMasked( sourceRgba, mask, targetBlock, flags );
+			CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
 			
 			// advance
 			targetBlock += bytesPerBlock;
@@ -194,7 +215,7 @@ void DecompressImage( u8* rgba, int width, int height, void const* blocks, int f
 
 	// initialise the block input
 	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
-	int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
+	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
 
 	// loop over blocks
 	for( int y = 0; y < height; y += 4 )

+ 92 - 70
Engine/lib/squish/squish.h

@@ -39,31 +39,31 @@ typedef unsigned char u8;
 enum
 {
 	//! Use DXT1 compression.
-	kDxt1 = ( 1 << 0 ), 
-	
+	kDxt1 = ( 1 << 0 ),
+
 	//! Use DXT3 compression.
-	kDxt3 = ( 1 << 1 ), 
-	
+	kDxt3 = ( 1 << 1 ),
+
 	//! Use DXT5 compression.
-	kDxt5 = ( 1 << 2 ), 
-	
-	//! Use a very slow but very high quality colour compressor.
-	kColourIterativeClusterFit = ( 1 << 8 ),	
-	
+	kDxt5 = ( 1 << 2 ),
+
+	//! Use BC4 compression.
+	kBc4 = ( 1 << 3 ),
+
+	//! Use BC5 compression.
+	kBc5 = ( 1 << 4 ),
+
 	//! Use a slow but high quality colour compressor (the default).
-	kColourClusterFit = ( 1 << 3 ),	
-	
+	kColourClusterFit = ( 1 << 5 ),
+
 	//! Use a fast but low quality colour compressor.
-	kColourRangeFit	= ( 1 << 4 ),
-	
-	//! Use a perceptual metric for colour error (the default).
-	kColourMetricPerceptual = ( 1 << 5 ),
+	kColourRangeFit	= ( 1 << 6 ),
 
-	//! Use a uniform metric for colour error.
-	kColourMetricUniform = ( 1 << 6 ),
-	
 	//! Weight the colour by alpha during cluster fit (disabled by default).
-	kWeightColourByAlpha = ( 1 << 7 )
+	kWeightColourByAlpha = ( 1 << 7 ),
+
+	//! Use a very slow but very high quality colour compressor.
+	kColourIterativeClusterFit = ( 1 << 8 ),
 };
 
 // -----------------------------------------------------------------------------
@@ -71,74 +71,90 @@ enum
 /*! @brief Compresses a 4x4 block of pixels.
 
 	@param rgba		The rgba values of the 16 source pixels.
+	@param mask		The valid pixel mask.
 	@param block	Storage for the compressed DXT block.
 	@param flags	Compression flags.
+	@param metric	An optional perceptual metric.
 	
 	The source pixels should be presented as a contiguous array of 16 rgba
 	values, with each component as 1 byte each. In memory this should be:
 	
 		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
+		
+	The mask parameter enables only certain pixels within the block. The lowest
+	bit enables the first pixel and so on up to the 16th bit. Bits beyond the
+	16th bit are ignored. Pixels that are not enabled are allowed to take
+	arbitrary colours in the output block. An example of how this can be used
+	is in the CompressImage function to disable pixels outside the bounds of
+	the image when the width or height is not divisible by 4.
 	
 	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
 	however, DXT1 will be used by default if none is specified. When using DXT1 
 	compression, 8 bytes of storage are required for the compressed DXT block. 
 	DXT3 and DXT5 compression require 16 bytes of storage per block.
 	
-	The flags parameter can also specify a preferred colour compressor and 
-	colour error metric to use when fitting the RGB components of the data. 
-	Possible colour compressors are: kColourClusterFit (the default), 
-	kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics 
-	are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no 
-	flags are specified in any particular category then the default will be 
-	used. Unknown flags are ignored.
-	
-	When using kColourClusterFit, an additional flag can be specified to
-	weight the colour of each pixel by its alpha value. For images that are
-	rendered using alpha blending, this can significantly increase the 
-	perceived quality.
+	The flags parameter can also specify a preferred colour compressor to use 
+	when fitting the RGB components of the data. Possible colour compressors 
+	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
+	quality) or kColourIterativeClusterFit (slowest, best quality).
+		
+	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
+	flag can be specified to weight the importance of each pixel by its alpha 
+	value. For images that are rendered using alpha blending, this can 
+	significantly increase the perceived quality.
+	
+	The metric parameter can be used to weight the relative importance of each
+	colour channel, or pass NULL to use the default uniform weight of 
+	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
+	allowed either uniform or "perceptual" weights with the fixed values
+	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
+	contiguous array of 3 floats.
 */
-void Compress( u8 const* rgba, void* block, int flags );
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
 
 // -----------------------------------------------------------------------------
 
 /*! @brief Compresses a 4x4 block of pixels.
 
 	@param rgba		The rgba values of the 16 source pixels.
-	@param mask		The valid pixel mask.
 	@param block	Storage for the compressed DXT block.
 	@param flags	Compression flags.
+	@param metric	An optional perceptual metric.
 	
 	The source pixels should be presented as a contiguous array of 16 rgba
 	values, with each component as 1 byte each. In memory this should be:
 	
 		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-		
-	The mask parameter enables only certain pixels within the block. The lowest
-	bit enables the first pixel and so on up to the 16th bit. Bits beyond the
-	16th bit are ignored. Pixels that are not enabled are allowed to take
-	arbitrary colours in the output block. An example of how this can be used
-	is in the CompressImage function to disable pixels outside the bounds of
-	the image when the width or height is not divisible by 4.
 	
 	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
 	however, DXT1 will be used by default if none is specified. When using DXT1 
 	compression, 8 bytes of storage are required for the compressed DXT block. 
 	DXT3 and DXT5 compression require 16 bytes of storage per block.
 	
-	The flags parameter can also specify a preferred colour compressor and 
-	colour error metric to use when fitting the RGB components of the data. 
-	Possible colour compressors are: kColourClusterFit (the default), 
-	kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics 
-	are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no 
-	flags are specified in any particular category then the default will be 
-	used. Unknown flags are ignored.
-	
-	When using kColourClusterFit, an additional flag can be specified to
-	weight the colour of each pixel by its alpha value. For images that are
-	rendered using alpha blending, this can significantly increase the 
-	perceived quality.
+	The flags parameter can also specify a preferred colour compressor to use 
+	when fitting the RGB components of the data. Possible colour compressors 
+	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
+	quality) or kColourIterativeClusterFit (slowest, best quality).
+		
+	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
+	flag can be specified to weight the importance of each pixel by its alpha 
+	value. For images that are rendered using alpha blending, this can 
+	significantly increase the perceived quality.
+	
+	The metric parameter can be used to weight the relative importance of each
+	colour channel, or pass NULL to use the default uniform weight of 
+	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
+	allowed either uniform or "perceptual" weights with the fixed values
+	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
+	contiguous array of 3 floats.
+	
+	This method is an inline that calls CompressMasked with a mask of 0xffff, 
+	provided for compatibility with older versions of squish.
 */
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags );
+inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
+{
+	CompressMasked( rgba, 0xffff, block, flags, metric );
+}
 
 // -----------------------------------------------------------------------------
 
@@ -186,6 +202,7 @@ int GetStorageRequirements( int width, int height, int flags );
 	@param height	The height of the source image.
 	@param blocks	Storage for the compressed output.
 	@param flags	Compression flags.
+	@param metric	An optional perceptual metric.
 	
 	The source pixels should be presented as a contiguous array of width*height
 	rgba values, with each component as 1 byte each. In memory this should be:
@@ -197,24 +214,29 @@ int GetStorageRequirements( int width, int height, int flags );
 	compression, 8 bytes of storage are required for each compressed DXT block. 
 	DXT3 and DXT5 compression require 16 bytes of storage per block.
 	
-	The flags parameter can also specify a preferred colour compressor and 
-	colour error metric to use when fitting the RGB components of the data. 
-	Possible colour compressors are: kColourClusterFit (the default), 
-	kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics 
-	are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no 
-	flags are specified in any particular category then the default will be 
-	used. Unknown flags are ignored.
-	
-	When using kColourClusterFit, an additional flag can be specified to
-	weight the colour of each pixel by its alpha value. For images that are
-	rendered using alpha blending, this can significantly increase the 
-	perceived quality.
-	
-	Internally this function calls squish::Compress for each block. To see how
-	much memory is required in the compressed image, use
-	squish::GetStorageRequirements.
+	The flags parameter can also specify a preferred colour compressor to use 
+	when fitting the RGB components of the data. Possible colour compressors 
+	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
+	quality) or kColourIterativeClusterFit (slowest, best quality).
+		
+	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
+	flag can be specified to weight the importance of each pixel by its alpha 
+	value. For images that are rendered using alpha blending, this can 
+	significantly increase the perceived quality.
+	
+	The metric parameter can be used to weight the relative importance of each
+	colour channel, or pass NULL to use the default uniform weight of 
+	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
+	allowed either uniform or "perceptual" weights with the fixed values
+	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
+	contiguous array of 3 floats.
+	
+	Internally this function calls squish::CompressMasked for each block, which 
+	allows for pixels outside the image to take arbitrary values. The function 
+	squish::GetStorageRequirements can be called to compute the amount of memory
+	to allocate for the compressed output.
 */
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags );
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
 
 // -----------------------------------------------------------------------------
 

+ 0 - 508
Engine/lib/squish/texture_compression_s3tc.txt

@@ -1,508 +0,0 @@
-Name
-
-    EXT_texture_compression_s3tc
-
-Name Strings
-
-    GL_EXT_texture_compression_s3tc
-
-Contact
-
-    Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
-
-Status
-
-    FINAL
-
-Version
-
-    1.1, 16 November 2001 (containing only clarifications relative to
-                           version 1.0, dated 7 July 2000)
-
-Number
-
-    198
-
-Dependencies
-
-    OpenGL 1.1 is required.
-
-    GL_ARB_texture_compression is required.
-
-    This extension is written against the OpenGL 1.2.1 Specification.
-
-Overview
-
-    This extension provides additional texture compression functionality
-    specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
-    subject to all the requirements and limitations described by the extension
-    GL_ARB_texture_compression.
-
-    This extension supports DXT1, DXT3, and DXT5 texture compression formats.
-    For the DXT1 image format, this specification supports an RGB-only mode
-    and a special RGBA mode with single-bit "transparent" alpha.
-
-IP Status
-
-    Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
-    property issues associated with implementing this extension.
-
-    WARNING:  Vendors able to support S3TC texture compression in Direct3D
-    drivers do not necessarily have the right to use the same functionality in
-    OpenGL.
-
-Issues
-
-    (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
-
-        RESOLVED:  No -- insufficient interest.  Supporting DXT2 and DXT4
-        would require some rework to the TexEnv definition (maybe add a new
-        base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
-        Note that the EXT_texture_env_combine extension (which extends normal
-        TexEnv modes) can be used to support textures with premultipled alpha.
-
-    (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
-        or should we use only the DXT<n> enums?  
-
-        RESOLVED:  No.  A generic RGBA_S3TC_EXT is problematic because DXT3
-        and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
-        also) yet one format must be chosen up front.
-
-    (3) Should TexSubImage support all block-aligned edits or just the minimal
-        functionality required by the ARB_texture_compression extension?
-
-        RESOLVED:  Allow all valid block-aligned edits.
-
-    (4) A pre-compressed image with a DXT1 format can be used as either an
-        RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image.  If the image has
-        transparent texels, how are they treated in each format?
-
-        RESOLVED:  The renderer has to make sure that an RGB_S3TC_DXT1 format
-        is decoded as RGB (where alpha is effectively one for all texels),
-        while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
-        texels with "transparent" encodings).  Otherwise, the formats are
-        identical.
-
-    (5) Is the encoding of the RGB components for DXT1 formats correct in this
-        spec?  MSDN documentation does not specify an RGB color for the
-        "transparent" encoding.  Is it really black?
-
-        RESOLVED:  Yes.  The specification for the DXT1 format initially
-        required black, but later changed that requirement to a
-        recommendation.  All vendors involved in the definition of this
-        specification support black.  In addition, specifying black has a
-        useful behavior.
-
-        When blending multiple texels (GL_LINEAR filtering), mixing opaque and
-        transparent samples is problematic.  Defining a black color on
-        transparent texels achieves a sensible result that works like a
-        texture with premultiplied alpha.  For example, if three opaque white
-        and one transparent sample is being averaged, the result would be a
-        75% intensity gray (with an alpha of 75%).  This is the same result on
-        the color channels as would be obtained using a white color, 75%
-        alpha, and a SRC_ALPHA blend factor.
-
-    (6) Is the encoding of the RGB components for DXT3 and DXT5 formats
-        correct in this spec?  MSDN documentation suggests that the RGB blocks
-        for DXT3 and DXT5 are decoded as described by the DXT1 format.
-
-        RESOLVED:  Yes -- this appears to be a bug in the MSDN documentation.
-        The specification for the DXT2-DXT5 formats require decoding using the
-        opaque block encoding, regardless of the relative values of "color0"
-        and "color1".
-
-New Procedures and Functions
-
-    None.
-
-New Tokens
-
-    Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
-    and CompressedTexImage2DARB and the <format> parameter of
-    CompressedTexSubImage2DARB:
-
-        COMPRESSED_RGB_S3TC_DXT1_EXT                   0x83F0
-        COMPRESSED_RGBA_S3TC_DXT1_EXT                  0x83F1
-        COMPRESSED_RGBA_S3TC_DXT3_EXT                  0x83F2
-        COMPRESSED_RGBA_S3TC_DXT5_EXT                  0x83F3
-
-Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
-
-    None.
-
-Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
-
-    Add to Table 3.16.1:  Specific Compressed Internal Formats
-
-        Compressed Internal Format         Base Internal Format
-        ==========================         ====================
-        COMPRESSED_RGB_S3TC_DXT1_EXT       RGB
-        COMPRESSED_RGBA_S3TC_DXT1_EXT      RGBA
-        COMPRESSED_RGBA_S3TC_DXT3_EXT      RGBA
-        COMPRESSED_RGBA_S3TC_DXT5_EXT      RGBA
-
-    
-    Modify Section 3.8.2, Alternate Image Specification
-
-    (add to end of TexSubImage discussion, p.123 -- after edit from the
-    ARB_texture_compression spec)
-
-    If the internal format of the texture image being modified is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
-    texture is stored using one of the several S3TC compressed texture image
-    formats.  Such images are easily edited along 4x4 texel boundaries, so the
-    limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
-    TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
-    error only if one of the following conditions occurs:
-
-        * <width> is not a multiple of four or equal to TEXTURE_WIDTH, 
-          unless <xoffset> and <yoffset> are both zero.
-        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
-          unless <xoffset> and <yoffset> are both zero.
-        * <xoffset> or <yoffset> is not a multiple of four.
-
-    The contents of any 4x4 block of texels of an S3TC compressed texture
-    image that does not intersect the area being modified are preserved during
-    valid TexSubImage2D and CopyTexSubImage2D calls.
-
-
-    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
-    the CompressedTexImage section introduced by the ARB_texture_compression
-    spec)
-
-    If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
-    COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
-    of several S3TC compressed texture image formats.  The S3TC texture
-    compression algorithm supports only 2D images without borders.
-    CompressedTexImage1DARB and CompressedTexImage3DARB produce an
-    INVALID_ENUM error if <internalformat> is an S3TC format.
-    CompressedTexImage2DARB will produce an INVALID_OPERATION error if
-    <border> is non-zero.
-
-
-    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
-    the CompressedTexSubImage section introduced by the
-    ARB_texture_compression spec)
-
-    If the internal format of the texture image being modified is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
-    texture is stored using one of the several S3TC compressed texture image
-    formats.  Since the S3TC texture compression algorithm supports only 2D
-    images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
-    an INVALID_ENUM error if <format> is an S3TC format.  Since S3TC images
-    are easily edited along 4x4 texel boundaries, the limitations on
-    CompressedTexSubImage2D are relaxed.  CompressedTexSubImage2D will result
-    in an INVALID_OPERATION error only if one of the following conditions
-    occurs:
-
-        * <width> is not a multiple of four or equal to TEXTURE_WIDTH.
-        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
-        * <xoffset> or <yoffset> is not a multiple of four.
-
-    The contents of any 4x4 block of texels of an S3TC compressed texture
-    image that does not intersect the area being modified are preserved during
-    valid TexSubImage2D and CopyTexSubImage2D calls.
-
-Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
-Operations and the Frame Buffer)
-
-    None.
-
-Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
-
-    None.
-
-Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
-State Requests)
-
-    None.
-
-Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
-
-    None.
-
-Additions to the AGL/GLX/WGL Specifications
-
-    None.
-
-GLX Protocol
-
-    None.
-
-Errors
-
-    INVALID_ENUM is generated by CompressedTexImage1DARB or
-    CompressedTexImage3DARB if <internalformat> is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
-
-    INVALID_OPERATION is generated by CompressedTexImage2DARB if
-    <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
-    COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
-
-    INVALID_ENUM is generated by CompressedTexSubImage1DARB or
-    CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
-    COMPRESSED_RGBA_S3TC_DXT5_EXT.
-
-    INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
-    CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
-    the following apply: <width> is not a multiple of four or equal to
-    TEXTURE_WIDTH; <height> is not a multiple of four or equal to
-    TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
-
-
-    The following restrictions from the ARB_texture_compression specification
-    do not apply to S3TC texture formats, since subimage modification is
-    straightforward as long as the subimage is properly aligned.
-
-    DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
-    DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
-    DELETE: CopyTexSubImage3D if the internal format of the texture image is
-    DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
-    DELETE: -b, where b is value of TEXTURE_BORDER.
-
-    DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
-    DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
-    DELETE: entire texture image is not being edited:  if <xoffset>,
-    DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
-    DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
-    DELETE: + <depth> is less than d+b, where b is the value of
-    DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
-    DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
-
-    See also errors in the GL_ARB_texture_compression specification.
-
-New State
-
-    In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
-    subscript for Z by 4 in the "Type" row.
-
-New Implementation Dependent State
-
-    None
-
-Appendix
-
-    S3TC Compressed Texture Image Formats
-
-    Compressed texture images stored using the S3TC compressed image formats
-    are represented as a collection of 4x4 texel blocks, where each block
-    contains 64 or 128 bits of texel data.  The image is encoded as a normal
-    2D raster image in which each 4x4 block is treated as a single pixel.  If
-    an S3TC image has a width or height less than four, the data corresponding
-    to texels outside the image are irrelevant and undefined.
-
-    When an S3TC image with a width of <w>, height of <h>, and block size of
-    <blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
-    bytes) is:
-    
-        ceil(<w>/4) * ceil(<h>/4) * blocksize.
-
-    When decoding an S3TC image, the block containing the texel at offset
-    (<x>, <y>) begins at an offset (in bytes) relative to the base of the
-    image of:
-
-        blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
-
-    The data corresponding to a specific texel (<x>, <y>) are extracted from a
-    4x4 texel block using a relative (x,y) value of
-    
-        (<x> modulo 4, <y> modulo 4).
-
-    There are four distinct S3TC image formats:
-
-    COMPRESSED_RGB_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
-    bits of RGB image data.  
-
-    Each RGB image data block is encoded as a sequence of 8 bytes, called (in
-    order of increasing address):
-
-            c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
-
-        The 8 bytes of the block are decoded into three quantities:
-
-            color0 = c0_lo + c0_hi * 256
-            color1 = c1_lo + c1_hi * 256
-            bits   = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
-        
-        color0 and color1 are 16-bit unsigned integers that are unpacked to
-        RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
-        a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
-
-        bits is a 32-bit unsigned integer, from which a two-bit control code
-        is extracted for a texel at location (x,y) in the block using:
-
-            code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
-        
-        where bit 31 is the most significant and bit 0 is the least
-        significant bit.
-
-        The RGB color for a texel at location (x,y) in the block is given by:
-
-            RGB0,              if color0 > color1 and code(x,y) == 0
-            RGB1,              if color0 > color1 and code(x,y) == 1
-            (2*RGB0+RGB1)/3,   if color0 > color1 and code(x,y) == 2
-            (RGB0+2*RGB1)/3,   if color0 > color1 and code(x,y) == 3
-
-            RGB0,              if color0 <= color1 and code(x,y) == 0
-            RGB1,              if color0 <= color1 and code(x,y) == 1
-            (RGB0+RGB1)/2,     if color0 <= color1 and code(x,y) == 2
-            BLACK,             if color0 <= color1 and code(x,y) == 3
-
-        Arithmetic operations are done per component, and BLACK refers to an
-        RGB color where red, green, and blue are all zero.
-
-    Since this image has an RGB format, there is no alpha component and the
-    image is considered fully opaque.
-
-
-    COMPRESSED_RGBA_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
-    bits of RGB image data and minimal alpha information.  The RGB components
-    of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
- 
-        The alpha component for a texel at location (x,y) in the block is
-        given by:
-
-            0.0,               if color0 <= color1 and code(x,y) == 3
-            1.0,               otherwise
-
-        IMPORTANT:  When encoding an RGBA image into a format using 1-bit
-        alpha, any texels with an alpha component less than 0.5 end up with an
-        alpha of 0.0 and any texels with an alpha component greater than or
-        equal to 0.5 end up with an alpha of 1.0.  When encoding an RGBA image
-        into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
-        green, and blue components of any texels with a final alpha of 0.0
-        will automatically be zero (black).  If this behavior is not desired
-        by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
-        This format will never be used when a generic compressed internal
-        format (Table 3.16.2) is specified, although the nearly identical
-        format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
-
-
-    COMPRESSED_RGBA_S3TC_DXT3_EXT:  Each 4x4 block of texels consists of 64
-    bits of uncompressed alpha image data followed by 64 bits of RGB image
-    data.  
-
-    Each RGB image data block is encoded according to the
-    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
-    bits always use the non-transparent encodings.  In other words, they are
-    treated as though color0 > color1, regardless of the actual values of
-    color0 and color1.
-
-    Each alpha image data block is encoded as a sequence of 8 bytes, called
-    (in order of increasing address):
-
-            a0, a1, a2, a3, a4, a5, a6, a7
-
-        The 8 bytes of the block are decoded into one 64-bit integer:
-
-            alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
-                         256 * (a5 + 256 * (a6 + 256 * a7))))))
-
-        alpha is a 64-bit unsigned integer, from which a four-bit alpha value
-        is extracted for a texel at location (x,y) in the block using:
-
-            alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
-
-        where bit 63 is the most significant and bit 0 is the least
-        significant bit.
-
-        The alpha component for a texel at location (x,y) in the block is
-        given by alpha(x,y) / 15.
-
- 
-    COMPRESSED_RGBA_S3TC_DXT5_EXT:  Each 4x4 block of texels consists of 64
-    bits of compressed alpha image data followed by 64 bits of RGB image data.
-
-    Each RGB image data block is encoded according to the
-    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
-    bits always use the non-transparent encodings.  In other words, they are
-    treated as though color0 > color1, regardless of the actual values of
-    color0 and color1.
-
-    Each alpha image data block is encoded as a sequence of 8 bytes, called
-    (in order of increasing address):
-
-        alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
-
-        The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
-        components by multiplying by 1/255.
-
-        The 6 "bits" bytes of the block are decoded into one 48-bit integer:
-
-          bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + 
-                          256 * (bits_4 + 256 * bits_5))))
-
-        bits is a 48-bit unsigned integer, from which a three-bit control code
-        is extracted for a texel at location (x,y) in the block using:
-
-            code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
-
-        where bit 47 is the most significant and bit 0 is the least
-        significant bit.
-
-        The alpha component for a texel at location (x,y) in the block is
-        given by:
-
-              alpha0,                   code(x,y) == 0
-              alpha1,                   code(x,y) == 1
-
-              (6*alpha0 + 1*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 2
-              (5*alpha0 + 2*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 3
-              (4*alpha0 + 3*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 4
-              (3*alpha0 + 4*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 5
-              (2*alpha0 + 5*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 6
-              (1*alpha0 + 6*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 7
-
-              (4*alpha0 + 1*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 2
-              (3*alpha0 + 2*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 3
-              (2*alpha0 + 3*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 4
-              (1*alpha0 + 4*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 5
-              0.0,                      alpha0 <= alpha1 and code(x,y) == 6
-              1.0,                      alpha0 <= alpha1 and code(x,y) == 7
-
-
-Revision History
-
-    1.1,  11/16/01 pbrown:    Updated contact info, clarified where texels
-                              fall within a single block.
-
-    1.0,  07/07/00 prbrown1:  Published final version agreed to by working
-                              group members.
-
-    0.9,  06/24/00 prbrown1:  Documented that block-aligned TexSubImage calls
-                              do not modify existing texels outside the
-                              modified blocks.  Added caveat to allow for a
-                              (0,0)-anchored TexSubImage operation of
-                              arbitrary size.
-
-    0.7,  04/11/00 prbrown1:  Added issues on DXT1, DXT3, and DXT5 encodings
-                              where the MSDN documentation doesn't match what
-                              is really done.  Added enum values from the
-                              extension registry.
-
-    0.4,  03/28/00 prbrown1:  Updated to reflect final version of the
-                              ARB_texture_compression extension.  Allowed
-                              block-aligned TexSubImage calls.
-
-    0.3,  03/07/00 prbrown1:  Resolved issues pertaining to the format of RGB
-                              blocks in the DXT3 and DXT5 formats (they don't
-                              ever use the "transparent" encoding).  Fixed
-                              decoding of DXT1 blocks.  Pointed out issue of
-                              "transparent" texels in DXT1 encodings having
-                              different behaviors for RGB and RGBA internal
-                              formats.
-
-    0.2,  02/23/00 prbrown1:  Minor revisions; added several issues.
-
-    0.11, 02/17/00 prbrown1:  Slight modification to error semantics
-                              (INVALID_ENUM instead of INVALID_OPERATION).
-
-    0.1,  02/15/00 prbrown1:  Initial revision.