8 years ago · 670f07ce81
--- a/include/bimg/bimg.h
+++ b/include/bimg/bimg.h
@@ -194,6 +194,7 @@ namespace bimg
 
				 		TextureFormat::Enum m_format;
			
 
				 		uint32_t m_width;
			
 
				 		uint32_t m_height;
			
 
				+		uint32_t m_depth;
			
 
				 		uint32_t m_blockSize;
			
 
				 		uint32_t m_size;
			
 
				 		uint8_t  m_bpp;
			
@@ -285,6 +286,7 @@ namespace bimg
 
				 		  void* _dst
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _srcPitch
			
 
				 		, const void* _src
			
 
				 		);
			
@@ -294,6 +296,7 @@ namespace bimg
 
				 		  void* _dst
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _srcPitch
			
 
				 		, const void* _src
			
 
				 		);
			
@@ -303,6 +306,7 @@ namespace bimg
 
				 		  void* _dst
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _srcPitch
			
 
				 		, const void* _src
			
 
				 		);
			
@@ -312,6 +316,7 @@ namespace bimg
 
				 		  void* _dst
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _srcPitch
			
 
				 		, const void* _src
			
 
				 		);
			
@@ -385,6 +390,7 @@ namespace bimg
 
				 		, UnpackFn _unpack
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _srcPitch
			
 
				 		);
			
 
				 
			
@@ -396,6 +402,7 @@ namespace bimg
 
				 		, TextureFormat::Enum _srcFormat
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		);
			
 
				 
			
 
				 	///
			
@@ -540,6 +547,7 @@ namespace bimg
 
				 		, const void* _src
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _dstPitch
			
 
				 		, TextureFormat::Enum _format
			
 
				 		);
			
--- a/include/bimg/encode.h
+++ b/include/bimg/encode.h
@@ -28,6 +28,7 @@ namespace bimg
 
				 		, const void* _src
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, TextureFormat::Enum _format
			
 
				 		, Quality::Enum _quality
			
 
				 		, bx::Error* _err = NULL
			
@@ -40,6 +41,7 @@ namespace bimg
 
				 		, const void* _src
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, TextureFormat::Enum _format
			
 
				 		, Quality::Enum _quality
			
 
				 		, bx::Error* _err = NULL
			
@@ -50,6 +52,7 @@ namespace bimg
 
				 		  void* _dst
			
 
				 		, uint32_t _width
			
 
				 		, uint32_t _height
			
 
				+		, uint32_t _depth
			
 
				 		, uint32_t _pitch
			
 
				 		, const void* _src
			
 
				 		);
			
--- a/src/image.cpp
+++ b/src/image.cpp
@@ -334,7 +334,7 @@ namespace bimg
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba8Downsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
 
				+	void imageRgba8Downsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
			
 
				 	{
			
 
				 		const uint32_t dstWidth  = _width/2;
			
 
				 		const uint32_t dstHeight = _height/2;
			
@@ -348,44 +348,47 @@ namespace bimg
 
				 		uint8_t* dst = (uint8_t*)_dst;
			
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 
			
 
				-		for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			const uint8_t* rgba = src;
			
 
				-			for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba += 8, dst += 4)
			
 
				+			for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
			
 
				 			{
			
 
				-				float rr = bx::fpow(rgba[          0], 2.2f);
			
 
				-				float gg = bx::fpow(rgba[          1], 2.2f);
			
 
				-				float bb = bx::fpow(rgba[          2], 2.2f);
			
 
				-				float aa =          rgba[          3];
			
 
				-				rr      += bx::fpow(rgba[          4], 2.2f);
			
 
				-				gg      += bx::fpow(rgba[          5], 2.2f);
			
 
				-				bb      += bx::fpow(rgba[          6], 2.2f);
			
 
				-				aa      +=          rgba[          7];
			
 
				-				rr      += bx::fpow(rgba[_srcPitch+0], 2.2f);
			
 
				-				gg      += bx::fpow(rgba[_srcPitch+1], 2.2f);
			
 
				-				bb      += bx::fpow(rgba[_srcPitch+2], 2.2f);
			
 
				-				aa      +=          rgba[_srcPitch+3];
			
 
				-				rr      += bx::fpow(rgba[_srcPitch+4], 2.2f);
			
 
				-				gg      += bx::fpow(rgba[_srcPitch+5], 2.2f);
			
 
				-				bb      += bx::fpow(rgba[_srcPitch+6], 2.2f);
			
 
				-				aa      +=          rgba[_srcPitch+7];
			
 
				-
			
 
				-				rr *= 0.25f;
			
 
				-				gg *= 0.25f;
			
 
				-				bb *= 0.25f;
			
 
				-				aa *= 0.25f;
			
 
				-				rr = bx::fpow(rr, 1.0f/2.2f);
			
 
				-				gg = bx::fpow(gg, 1.0f/2.2f);
			
 
				-				bb = bx::fpow(bb, 1.0f/2.2f);
			
 
				-				dst[0] = (uint8_t)rr;
			
 
				-				dst[1] = (uint8_t)gg;
			
 
				-				dst[2] = (uint8_t)bb;
			
 
				-				dst[3] = (uint8_t)aa;
			
 
				+				const uint8_t* rgba = src;
			
 
				+				for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba += 8, dst += 4)
			
 
				+				{
			
 
				+					float rr = bx::fpow(rgba[          0], 2.2f);
			
 
				+					float gg = bx::fpow(rgba[          1], 2.2f);
			
 
				+					float bb = bx::fpow(rgba[          2], 2.2f);
			
 
				+					float aa =          rgba[          3];
			
 
				+					rr      += bx::fpow(rgba[          4], 2.2f);
			
 
				+					gg      += bx::fpow(rgba[          5], 2.2f);
			
 
				+					bb      += bx::fpow(rgba[          6], 2.2f);
			
 
				+					aa      +=          rgba[          7];
			
 
				+					rr      += bx::fpow(rgba[_srcPitch+0], 2.2f);
			
 
				+					gg      += bx::fpow(rgba[_srcPitch+1], 2.2f);
			
 
				+					bb      += bx::fpow(rgba[_srcPitch+2], 2.2f);
			
 
				+					aa      +=          rgba[_srcPitch+3];
			
 
				+					rr      += bx::fpow(rgba[_srcPitch+4], 2.2f);
			
 
				+					gg      += bx::fpow(rgba[_srcPitch+5], 2.2f);
			
 
				+					bb      += bx::fpow(rgba[_srcPitch+6], 2.2f);
			
 
				+					aa      +=          rgba[_srcPitch+7];
			
 
				+
			
 
				+					rr *= 0.25f;
			
 
				+					gg *= 0.25f;
			
 
				+					bb *= 0.25f;
			
 
				+					aa *= 0.25f;
			
 
				+					rr = bx::fpow(rr, 1.0f/2.2f);
			
 
				+					gg = bx::fpow(gg, 1.0f/2.2f);
			
 
				+					bb = bx::fpow(bb, 1.0f/2.2f);
			
 
				+					dst[0] = (uint8_t)rr;
			
 
				+					dst[1] = (uint8_t)gg;
			
 
				+					dst[2] = (uint8_t)bb;
			
 
				+					dst[3] = (uint8_t)aa;
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba8Downsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
 
				+	void imageRgba8Downsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
			
 
				 	{
			
 
				 		const uint32_t dstWidth  = _width/2;
			
 
				 		const uint32_t dstHeight = _height/2;
			
@@ -410,105 +413,114 @@ namespace bimg
 
				 		const simd128_t linear = simd_ld(2.2f, 2.2f, 2.2f, 1.0f);
			
 
				 		const simd128_t quater = simd_splat(0.25f);
			
 
				 
			
 
				-		for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			const uint8_t* rgba = src;
			
 
				-			for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba += 8, dst += 4)
			
 
				+			for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
			
 
				 			{
			
 
				-				const simd128_t abgr0  = simd_splat(rgba);
			
 
				-				const simd128_t abgr1  = simd_splat(rgba+4);
			
 
				-				const simd128_t abgr2  = simd_splat(rgba+_srcPitch);
			
 
				-				const simd128_t abgr3  = simd_splat(rgba+_srcPitch+4);
			
 
				-
			
 
				-				const simd128_t abgr0m = simd_and(abgr0, umask);
			
 
				-				const simd128_t abgr1m = simd_and(abgr1, umask);
			
 
				-				const simd128_t abgr2m = simd_and(abgr2, umask);
			
 
				-				const simd128_t abgr3m = simd_and(abgr3, umask);
			
 
				-				const simd128_t abgr0x = simd_xor(abgr0m, wflip);
			
 
				-				const simd128_t abgr1x = simd_xor(abgr1m, wflip);
			
 
				-				const simd128_t abgr2x = simd_xor(abgr2m, wflip);
			
 
				-				const simd128_t abgr3x = simd_xor(abgr3m, wflip);
			
 
				-				const simd128_t abgr0f = simd_itof(abgr0x);
			
 
				-				const simd128_t abgr1f = simd_itof(abgr1x);
			
 
				-				const simd128_t abgr2f = simd_itof(abgr2x);
			
 
				-				const simd128_t abgr3f = simd_itof(abgr3x);
			
 
				-				const simd128_t abgr0c = simd_add(abgr0f, wadd);
			
 
				-				const simd128_t abgr1c = simd_add(abgr1f, wadd);
			
 
				-				const simd128_t abgr2c = simd_add(abgr2f, wadd);
			
 
				-				const simd128_t abgr3c = simd_add(abgr3f, wadd);
			
 
				-				const simd128_t abgr0n = simd_mul(abgr0c, unpack);
			
 
				-				const simd128_t abgr1n = simd_mul(abgr1c, unpack);
			
 
				-				const simd128_t abgr2n = simd_mul(abgr2c, unpack);
			
 
				-				const simd128_t abgr3n = simd_mul(abgr3c, unpack);
			
 
				-
			
 
				-				const simd128_t abgr0l = simd_pow(abgr0n, linear);
			
 
				-				const simd128_t abgr1l = simd_pow(abgr1n, linear);
			
 
				-				const simd128_t abgr2l = simd_pow(abgr2n, linear);
			
 
				-				const simd128_t abgr3l = simd_pow(abgr3n, linear);
			
 
				-
			
 
				-				const simd128_t sum0   = simd_add(abgr0l, abgr1l);
			
 
				-				const simd128_t sum1   = simd_add(abgr2l, abgr3l);
			
 
				-				const simd128_t sum2   = simd_add(sum0, sum1);
			
 
				-				const simd128_t avg0   = simd_mul(sum2, quater);
			
 
				-				const simd128_t avg1   = simd_pow(avg0, gamma);
			
 
				-
			
 
				-				const simd128_t avg2   = simd_mul(avg1, pack);
			
 
				-				const simd128_t ftoi0  = simd_ftoi(avg2);
			
 
				-				const simd128_t ftoi1  = simd_and(ftoi0, pmask);
			
 
				-				const simd128_t zwxy   = simd_swiz_zwxy(ftoi1);
			
 
				-				const simd128_t tmp0   = simd_or(ftoi1, zwxy);
			
 
				-				const simd128_t yyyy   = simd_swiz_yyyy(tmp0);
			
 
				-				const simd128_t tmp1   = simd_iadd(yyyy, yyyy);
			
 
				-				const simd128_t result = simd_or(tmp0, tmp1);
			
 
				-
			
 
				-				simd_stx(dst, result);
			
 
				+				const uint8_t* rgba = src;
			
 
				+				for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba += 8, dst += 4)
			
 
				+				{
			
 
				+					const simd128_t abgr0  = simd_splat(rgba);
			
 
				+					const simd128_t abgr1  = simd_splat(rgba+4);
			
 
				+					const simd128_t abgr2  = simd_splat(rgba+_srcPitch);
			
 
				+					const simd128_t abgr3  = simd_splat(rgba+_srcPitch+4);
			
 
				+
			
 
				+					const simd128_t abgr0m = simd_and(abgr0, umask);
			
 
				+					const simd128_t abgr1m = simd_and(abgr1, umask);
			
 
				+					const simd128_t abgr2m = simd_and(abgr2, umask);
			
 
				+					const simd128_t abgr3m = simd_and(abgr3, umask);
			
 
				+					const simd128_t abgr0x = simd_xor(abgr0m, wflip);
			
 
				+					const simd128_t abgr1x = simd_xor(abgr1m, wflip);
			
 
				+					const simd128_t abgr2x = simd_xor(abgr2m, wflip);
			
 
				+					const simd128_t abgr3x = simd_xor(abgr3m, wflip);
			
 
				+					const simd128_t abgr0f = simd_itof(abgr0x);
			
 
				+					const simd128_t abgr1f = simd_itof(abgr1x);
			
 
				+					const simd128_t abgr2f = simd_itof(abgr2x);
			
 
				+					const simd128_t abgr3f = simd_itof(abgr3x);
			
 
				+					const simd128_t abgr0c = simd_add(abgr0f, wadd);
			
 
				+					const simd128_t abgr1c = simd_add(abgr1f, wadd);
			
 
				+					const simd128_t abgr2c = simd_add(abgr2f, wadd);
			
 
				+					const simd128_t abgr3c = simd_add(abgr3f, wadd);
			
 
				+					const simd128_t abgr0n = simd_mul(abgr0c, unpack);
			
 
				+					const simd128_t abgr1n = simd_mul(abgr1c, unpack);
			
 
				+					const simd128_t abgr2n = simd_mul(abgr2c, unpack);
			
 
				+					const simd128_t abgr3n = simd_mul(abgr3c, unpack);
			
 
				+
			
 
				+					const simd128_t abgr0l = simd_pow(abgr0n, linear);
			
 
				+					const simd128_t abgr1l = simd_pow(abgr1n, linear);
			
 
				+					const simd128_t abgr2l = simd_pow(abgr2n, linear);
			
 
				+					const simd128_t abgr3l = simd_pow(abgr3n, linear);
			
 
				+
			
 
				+					const simd128_t sum0   = simd_add(abgr0l, abgr1l);
			
 
				+					const simd128_t sum1   = simd_add(abgr2l, abgr3l);
			
 
				+					const simd128_t sum2   = simd_add(sum0, sum1);
			
 
				+					const simd128_t avg0   = simd_mul(sum2, quater);
			
 
				+					const simd128_t avg1   = simd_pow(avg0, gamma);
			
 
				+
			
 
				+					const simd128_t avg2   = simd_mul(avg1, pack);
			
 
				+					const simd128_t ftoi0  = simd_ftoi(avg2);
			
 
				+					const simd128_t ftoi1  = simd_and(ftoi0, pmask);
			
 
				+					const simd128_t zwxy   = simd_swiz_zwxy(ftoi1);
			
 
				+					const simd128_t tmp0   = simd_or(ftoi1, zwxy);
			
 
				+					const simd128_t yyyy   = simd_swiz_yyyy(tmp0);
			
 
				+					const simd128_t tmp1   = simd_iadd(yyyy, yyyy);
			
 
				+					const simd128_t result = simd_or(tmp0, tmp1);
			
 
				+
			
 
				+					simd_stx(dst, result);
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba32fToLinear(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
 
				+	void imageRgba32fToLinear(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
			
 
				 	{
			
 
				 		      uint8_t* dst = (      uint8_t*)_dst;
			
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 
			
 
				-		for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += _width*16)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+			for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += _width*16)
			
 
				 			{
			
 
				-				const uint32_t offset = xx * 16;
			
 
				-				      float* fd = (      float*)(dst + offset);
			
 
				-				const float* fs = (const float*)(src + offset);
			
 
				-
			
 
				-				fd[0] = bx::fpow(fs[0], 1.0f/2.2f);
			
 
				-				fd[1] = bx::fpow(fs[1], 1.0f/2.2f);
			
 
				-				fd[2] = bx::fpow(fs[2], 1.0f/2.2f);
			
 
				-				fd[3] =          fs[3];
			
 
				+				for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+				{
			
 
				+					const uint32_t offset = xx * 16;
			
 
				+						  float* fd = (      float*)(dst + offset);
			
 
				+					const float* fs = (const float*)(src + offset);
			
 
				+
			
 
				+					fd[0] = bx::fpow(fs[0], 1.0f/2.2f);
			
 
				+					fd[1] = bx::fpow(fs[1], 1.0f/2.2f);
			
 
				+					fd[2] = bx::fpow(fs[2], 1.0f/2.2f);
			
 
				+					fd[3] =          fs[3];
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba32fToGamma(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
 
				+	void imageRgba32fToGamma(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
			
 
				 	{
			
 
				 		      uint8_t* dst = (      uint8_t*)_dst;
			
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 
			
 
				-		for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += _width*16)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+			for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += _width*16)
			
 
				 			{
			
 
				-				const uint32_t offset = xx * 16;
			
 
				-				      float* fd = (      float*)(dst + offset);
			
 
				-				const float* fs = (const float*)(src + offset);
			
 
				-
			
 
				-				fd[0] = bx::fpow(fs[0], 2.2f);
			
 
				-				fd[1] = bx::fpow(fs[1], 2.2f);
			
 
				-				fd[2] = bx::fpow(fs[2], 2.2f);
			
 
				-				fd[3] =          fs[3];
			
 
				+				for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+				{
			
 
				+					const uint32_t offset = xx * 16;
			
 
				+						  float* fd = (      float*)(dst + offset);
			
 
				+					const float* fs = (const float*)(src + offset);
			
 
				+
			
 
				+					fd[0] = bx::fpow(fs[0], 2.2f);
			
 
				+					fd[1] = bx::fpow(fs[1], 2.2f);
			
 
				+					fd[2] = bx::fpow(fs[2], 2.2f);
			
 
				+					fd[3] =          fs[3];
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba32fLinearDownsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
 
				+	void imageRgba32fLinearDownsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
			
 
				 	{
			
 
				 		const uint32_t dstWidth  = _width/2;
			
 
				 		const uint32_t dstHeight = _height/2;
			
@@ -522,47 +534,50 @@ namespace bimg
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 		uint8_t* dst = (uint8_t*)_dst;
			
 
				 
			
 
				-		for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			const float* rgba0 = (const float*)&src[0];
			
 
				-			const float* rgba1 = (const float*)&src[_srcPitch];
			
 
				-			for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba0 += 8, rgba1 += 8, dst += 16)
			
 
				+			for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
			
 
				 			{
			
 
				-				float xyz[4];
			
 
				-
			
 
				-				xyz[0]  = rgba0[0];
			
 
				-				xyz[1]  = rgba0[1];
			
 
				-				xyz[2]  = rgba0[2];
			
 
				-				xyz[3]  = rgba0[3];
			
 
				-
			
 
				-				xyz[0] += rgba0[4];
			
 
				-				xyz[1] += rgba0[5];
			
 
				-				xyz[2] += rgba0[6];
			
 
				-				xyz[3] += rgba0[7];
			
 
				-
			
 
				-				xyz[0] += rgba1[0];
			
 
				-				xyz[1] += rgba1[1];
			
 
				-				xyz[2] += rgba1[2];
			
 
				-				xyz[3] += rgba1[3];
			
 
				-
			
 
				-				xyz[0] += rgba1[4];
			
 
				-				xyz[1] += rgba1[5];
			
 
				-				xyz[2] += rgba1[6];
			
 
				-				xyz[3] += rgba1[7];
			
 
				-
			
 
				-				xyz[0] *= 0.25f;
			
 
				-				xyz[1] *= 0.25f;
			
 
				-				xyz[2] *= 0.25f;
			
 
				-				xyz[3] *= 0.25f;
			
 
				-
			
 
				-				bx::packRgba32F(dst, xyz);
			
 
				+				const float* rgba0 = (const float*)&src[0];
			
 
				+				const float* rgba1 = (const float*)&src[_srcPitch];
			
 
				+				for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba0 += 8, rgba1 += 8, dst += 16)
			
 
				+				{
			
 
				+					float xyz[4];
			
 
				+
			
 
				+					xyz[0]  = rgba0[0];
			
 
				+					xyz[1]  = rgba0[1];
			
 
				+					xyz[2]  = rgba0[2];
			
 
				+					xyz[3]  = rgba0[3];
			
 
				+
			
 
				+					xyz[0] += rgba0[4];
			
 
				+					xyz[1] += rgba0[5];
			
 
				+					xyz[2] += rgba0[6];
			
 
				+					xyz[3] += rgba0[7];
			
 
				+
			
 
				+					xyz[0] += rgba1[0];
			
 
				+					xyz[1] += rgba1[1];
			
 
				+					xyz[2] += rgba1[2];
			
 
				+					xyz[3] += rgba1[3];
			
 
				+
			
 
				+					xyz[0] += rgba1[4];
			
 
				+					xyz[1] += rgba1[5];
			
 
				+					xyz[2] += rgba1[6];
			
 
				+					xyz[3] += rgba1[7];
			
 
				+
			
 
				+					xyz[0] *= 0.25f;
			
 
				+					xyz[1] *= 0.25f;
			
 
				+					xyz[2] *= 0.25f;
			
 
				+					xyz[3] *= 0.25f;
			
 
				+
			
 
				+					bx::packRgba32F(dst, xyz);
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba32fLinearDownsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
 
				+	void imageRgba32fLinearDownsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
			
 
				 	{
			
 
				-		imageRgba32fLinearDownsample2x2Ref(_dst, _width, _height, _srcPitch, _src);
			
 
				+		imageRgba32fLinearDownsample2x2Ref(_dst, _width, _height, _depth, _srcPitch, _src);
			
 
				 	}
			
 
				 
			
 
				 	void imageRgba32fDownsample2x2NormalMapRef(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
			
@@ -813,25 +828,28 @@ namespace bimg
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageConvert(void* _dst, uint32_t _dstBpp, PackFn _pack, const void* _src, uint32_t _srcBpp, UnpackFn _unpack, uint32_t _width, uint32_t _height, uint32_t _srcPitch)
			
 
				+	void imageConvert(void* _dst, uint32_t _dstBpp, PackFn _pack, const void* _src, uint32_t _srcBpp, UnpackFn _unpack, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch)
			
 
				 	{
			
 
				 		const uint8_t* src = (uint8_t*)_src;
			
 
				 		uint8_t* dst = (uint8_t*)_dst;
			
 
				 
			
 
				 		const uint32_t dstPitch = _width * _dstBpp / 8;
			
 
				 
			
 
				-		for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += dstPitch)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+			for (uint32_t yy = 0; yy < _height; ++yy, src += _srcPitch, dst += dstPitch)
			
 
				 			{
			
 
				-				float rgba[4];
			
 
				-				_unpack(rgba, &src[xx*_srcBpp/8]);
			
 
				-				_pack(&dst[xx*_dstBpp/8], rgba);
			
 
				+				for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+				{
			
 
				+					float rgba[4];
			
 
				+					_unpack(rgba, &src[xx*_srcBpp/8]);
			
 
				+					_pack(&dst[xx*_dstBpp/8], rgba);
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _srcPitch)
			
 
				+	bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch)
			
 
				 	{
			
 
				 		UnpackFn unpack = s_packUnpack[_srcFormat].unpack;
			
 
				 		PackFn   pack   = s_packUnpack[_dstFormat].pack;
			
@@ -843,22 +861,22 @@ namespace bimg
 
				 
			
 
				 		const uint32_t srcBpp = s_imageBlockInfo[_srcFormat].bitsPerPixel;
			
 
				 		const uint32_t dstBpp = s_imageBlockInfo[_dstFormat].bitsPerPixel;
			
 
				-		imageConvert(_dst, dstBpp, pack, _src, srcBpp, unpack, _width, _height, _srcPitch);
			
 
				+		imageConvert(_dst, dstBpp, pack, _src, srcBpp, unpack, _width, _height, _depth, _srcPitch);
			
 
				 
			
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				-	bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height)
			
 
				+	bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _depth)
			
 
				 	{
			
 
				 		const uint32_t srcBpp = s_imageBlockInfo[_srcFormat].bitsPerPixel;
			
 
				 
			
 
				 		if (_dstFormat == _srcFormat)
			
 
				 		{
			
 
				-			bx::memCopy(_dst, _src, _width*_height*srcBpp/8);
			
 
				+			bx::memCopy(_dst, _src, _width*_height*_depth*srcBpp/8);
			
 
				 			return true;
			
 
				 		}
			
 
				 
			
 
				-		return imageConvert(_dst, _dstFormat, _src, _srcFormat, _width, _height, _width*srcBpp/8);
			
 
				+		return imageConvert(_dst, _dstFormat, _src, _srcFormat, _width, _height, _depth, _width*srcBpp/8);
			
 
				 	}
			
 
				 
			
 
				 	ImageContainer* imageConvert(bx::AllocatorI* _allocator, TextureFormat::Enum _dstFormat, const ImageContainer& _input)
			
@@ -892,6 +910,7 @@ namespace bimg
 
				 							, mip.m_format
			
 
				 							, mip.m_width
			
 
				 							, mip.m_height
			
 
				+							, mip.m_depth
			
 
				 							);
			
 
				 					BX_CHECK(ok, "Conversion from %s to %s failed!"
			
 
				 							, getName(_input.m_format)
			
@@ -2955,7 +2974,7 @@ namespace bimg
 
				 			{
			
 
				 				const uint32_t srcBpp   = s_imageBlockInfo[_srcFormat].bitsPerPixel;
			
 
				 				const uint32_t srcPitch = _width * srcBpp / 8;
			
 
				-				if (!imageConvert(_dst, TextureFormat::BGRA8, _src, _srcFormat, _width, _height, srcPitch) )
			
 
				+				if (!imageConvert(_dst, TextureFormat::BGRA8, _src, _srcFormat, _width, _height, 1, srcPitch) )
			
 
				 				{
			
 
				 					// Failed to convert, just make ugly red-yellow checkerboard texture.
			
 
				 					imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xffff0000), UINT32_C(0xffffff00) );
			
@@ -3048,66 +3067,73 @@ namespace bimg
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageDecodeToRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _dstPitch, TextureFormat::Enum _format)
			
 
				+	void imageDecodeToRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _dstPitch, TextureFormat::Enum _format)
			
 
				 	{
			
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 		uint8_t* dst = (uint8_t*)_dst;
			
 
				 
			
 
				-		switch (_format)
			
 
				+		const uint32_t srcBpp   = s_imageBlockInfo[_format].bitsPerPixel;
			
 
				+		const uint32_t srcPitch = _width*srcBpp/8;
			
 
				+
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz, src += _height*srcPitch, dst += _height*_dstPitch)
			
 
				 		{
			
 
				-		case TextureFormat::BC5:
			
 
				+			switch (_format)
			
 
				 			{
			
 
				-				uint32_t width  = _width/4;
			
 
				-				uint32_t height = _height/4;
			
 
				-
			
 
				-				for (uint32_t yy = 0; yy < height; ++yy)
			
 
				+			case TextureFormat::BC5:
			
 
				 				{
			
 
				-					for (uint32_t xx = 0; xx < width; ++xx)
			
 
				-					{
			
 
				-						uint8_t temp[16*4];
			
 
				+					uint32_t width  = _width/4;
			
 
				+					uint32_t height = _height/4;
			
 
				 
			
 
				-						decodeBlockDxt45A(temp+2, src);
			
 
				-						src += 8;
			
 
				-						decodeBlockDxt45A(temp+1, src);
			
 
				-						src += 8;
			
 
				+					const uint8_t* srcData = src;
			
 
				 
			
 
				-						for (uint32_t ii = 0; ii < 16; ++ii)
			
 
				+					for (uint32_t yy = 0; yy < height; ++yy)
			
 
				+					{
			
 
				+						for (uint32_t xx = 0; xx < width; ++xx)
			
 
				 						{
			
 
				-							float nx = temp[ii*4+2]*2.0f/255.0f - 1.0f;
			
 
				-							float ny = temp[ii*4+1]*2.0f/255.0f - 1.0f;
			
 
				-							float nz = bx::fsqrt(1.0f - nx*nx - ny*ny);
			
 
				-
			
 
				-							const uint32_t offset = (yy*4 + ii/4)*_width*16 + (xx*4 + ii%4)*16;
			
 
				-							float* block = (float*)&dst[offset];
			
 
				-							block[0] = nx;
			
 
				-							block[1] = ny;
			
 
				-							block[2] = nz;
			
 
				-							block[3] = 0.0f;
			
 
				+							uint8_t temp[16*4];
			
 
				+
			
 
				+							decodeBlockDxt45A(temp+2, srcData);
			
 
				+							srcData += 8;
			
 
				+							decodeBlockDxt45A(temp+1, srcData);
			
 
				+							srcData += 8;
			
 
				+
			
 
				+							for (uint32_t ii = 0; ii < 16; ++ii)
			
 
				+							{
			
 
				+								float nx = temp[ii*4+2]*2.0f/255.0f - 1.0f;
			
 
				+								float ny = temp[ii*4+1]*2.0f/255.0f - 1.0f;
			
 
				+								float nz = bx::fsqrt(1.0f - nx*nx - ny*ny);
			
 
				+
			
 
				+								const uint32_t offset = (yy*4 + ii/4)*_width*16 + (xx*4 + ii%4)*16;
			
 
				+								float* block = (float*)&dst[offset];
			
 
				+								block[0] = nx;
			
 
				+								block[1] = ny;
			
 
				+								block[2] = nz;
			
 
				+								block[3] = 0.0f;
			
 
				+							}
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
 
				-			}
			
 
				-			break;
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::RGBA32F:
			
 
				-			bx::memCopy(_dst, _src, _dstPitch*_height);
			
 
				-			break;
			
 
				+			case TextureFormat::RGBA32F:
			
 
				+				bx::memCopy(dst, src, _dstPitch*_height);
			
 
				+				break;
			
 
				 
			
 
				-		default:
			
 
				-			if (isCompressed(_format) )
			
 
				-			{
			
 
				-				uint32_t size = imageGetSize(NULL, uint16_t(_width), uint16_t(_height), 0, false, false, 1, TextureFormat::RGBA8);
			
 
				-				void* temp = BX_ALLOC(_allocator, size);
			
 
				-				imageDecodeToRgba8(temp, _src, _width, _height, _width*4, _format);
			
 
				-				imageRgba8ToRgba32f(_dst, _width, _height, _width*4, temp);
			
 
				-				BX_FREE(_allocator, temp);
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				const uint32_t srcBpp = s_imageBlockInfo[_format].bitsPerPixel;
			
 
				-				imageConvert(_dst, TextureFormat::RGBA32F, _src, _format, _width, _height, _width*srcBpp/8);
			
 
				+			default:
			
 
				+				if (isCompressed(_format) )
			
 
				+				{
			
 
				+					uint32_t size = imageGetSize(NULL, uint16_t(_width), uint16_t(_height), 0, false, false, 1, TextureFormat::RGBA8);
			
 
				+					void* temp = BX_ALLOC(_allocator, size);
			
 
				+					imageDecodeToRgba8(temp, src, _width, _height, _width*4, _format);
			
 
				+					imageRgba8ToRgba32f(dst, _width, _height, _width*4, temp);
			
 
				+					BX_FREE(_allocator, temp);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					imageConvert(dst, TextureFormat::RGBA32F, src, _format, _width, _height, 1, srcPitch);
			
 
				+				}
			
 
				+				break;
			
 
				 			}
			
 
				-			break;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -3168,6 +3194,7 @@ namespace bimg
 
				 					{
			
 
				 						_mip.m_width     = width;
			
 
				 						_mip.m_height    = height;
			
 
				+						_mip.m_depth     = depth;
			
 
				 						_mip.m_blockSize = blockSize;
			
 
				 						_mip.m_size      = mipSize;
			
 
				 						_mip.m_data      = &data[offset];
			
@@ -3209,6 +3236,7 @@ namespace bimg
 
				 					{
			
 
				 						_mip.m_width     = width;
			
 
				 						_mip.m_height    = height;
			
 
				+						_mip.m_depth     = depth;
			
 
				 						_mip.m_blockSize = blockSize;
			
 
				 						_mip.m_size      = size;
			
 
				 						_mip.m_data      = &data[offset];
			
--- a/src/image_encode.cpp
+++ b/src/image_encode.cpp
@@ -35,157 +35,171 @@ namespace bimg
 
				 	};
			
 
				 	BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_squishQuality) );
			
 
				 
			
 
				-	void imageEncodeFromRgba8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err)
			
 
				+	void imageEncodeFromRgba8(void* _dstX, const void* _srcX, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err)
			
 
				 	{
			
 
				-		BX_ERROR_SCOPE(_err);
			
 
				+		const uint8_t* src = (const uint8_t*)_srcX;
			
 
				+		uint8_t* dst = (uint8_t*)_dstX;
			
 
				 
			
 
				-		switch (_format)
			
 
				+		for (uint32_t zz = 0; zz < _depth && _err->isOk(); ++zz)
			
 
				 		{
			
 
				-		case TextureFormat::BC1:
			
 
				-		case TextureFormat::BC2:
			
 
				-		case TextureFormat::BC3:
			
 
				-		case TextureFormat::BC4:
			
 
				-		case TextureFormat::BC5:
			
 
				-			squish::CompressImage( (const uint8_t*)_src, _width, _height, _dst
			
 
				-				, s_squishQuality[_quality]
			
 
				-				| (_format == TextureFormat::BC2 ? squish::kDxt3
			
 
				-				:  _format == TextureFormat::BC3 ? squish::kDxt5
			
 
				-				:  _format == TextureFormat::BC4 ? squish::kBc4
			
 
				-				:  _format == TextureFormat::BC5 ? squish::kBc5
			
 
				-				:                                  squish::kDxt1)
			
 
				-				);
			
 
				-			break;
			
 
				+			switch (_format)
			
 
				+			{
			
 
				+			case TextureFormat::BC1:
			
 
				+			case TextureFormat::BC2:
			
 
				+			case TextureFormat::BC3:
			
 
				+			case TextureFormat::BC4:
			
 
				+			case TextureFormat::BC5:
			
 
				+				squish::CompressImage(src, _width, _height, dst
			
 
				+					, s_squishQuality[_quality]
			
 
				+					| (_format == TextureFormat::BC2 ? squish::kDxt3
			
 
				+					:  _format == TextureFormat::BC3 ? squish::kDxt5
			
 
				+					:  _format == TextureFormat::BC4 ? squish::kBc4
			
 
				+					:  _format == TextureFormat::BC5 ? squish::kBc5
			
 
				+					:                                  squish::kDxt1)
			
 
				+					);
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::BC6H:
			
 
				-			nvtt::compressBC6H( (const uint8_t*)_src, _width, _height, 4, _dst);
			
 
				-			break;
			
 
				+			case TextureFormat::BC6H:
			
 
				+				nvtt::compressBC6H(src, _width, _height, 4, dst);
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::BC7:
			
 
				-			nvtt::compressBC7( (const uint8_t*)_src, _width, _height, 4, _dst);
			
 
				-			break;
			
 
				+			case TextureFormat::BC7:
			
 
				+				nvtt::compressBC7(src, _width, _height, 4, dst);
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::ETC1:
			
 
				-			etc1_encode_image( (const uint8_t*)_src, _width, _height, 4, _width*4, (uint8_t*)_dst);
			
 
				-			break;
			
 
				+			case TextureFormat::ETC1:
			
 
				+				etc1_encode_image(src, _width, _height, 4, _width*4, dst);
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::ETC2:
			
 
				-			{
			
 
				-				const uint32_t blockWidth  = (_width +3)/4;
			
 
				-				const uint32_t blockHeight = (_height+3)/4;
			
 
				-				const uint32_t pitch = _width*4;
			
 
				-				const uint8_t* src = (const uint8_t*)_src;
			
 
				-				uint64_t* dst = (uint64_t*)_dst;
			
 
				-				for (uint32_t yy = 0; yy < blockHeight; ++yy)
			
 
				+			case TextureFormat::ETC2:
			
 
				 				{
			
 
				-					for (uint32_t xx = 0; xx < blockWidth; ++xx)
			
 
				+					const uint32_t blockWidth  = (_width +3)/4;
			
 
				+					const uint32_t blockHeight = (_height+3)/4;
			
 
				+					const uint32_t pitch = _width*4;
			
 
				+					uint64_t* dstBlock = (uint64_t*)dst;
			
 
				+					for (uint32_t yy = 0; yy < blockHeight; ++yy)
			
 
				 					{
			
 
				-						uint8_t block[4*4*4];
			
 
				-						const uint8_t* ptr = &src[(yy*pitch+xx*4)*4];
			
 
				+						for (uint32_t xx = 0; xx < blockWidth; ++xx)
			
 
				+						{
			
 
				+							uint8_t block[4*4*4];
			
 
				+							const uint8_t* ptr = &src[(yy*pitch+xx*4)*4];
			
 
				 
			
 
				-						for (uint32_t ii = 0; ii < 16; ++ii)
			
 
				-						{ // BGRx
			
 
				-							bx::memCopy(&block[ii*4], &ptr[(ii%4)*pitch + (ii&~3)], 4);
			
 
				-							bx::xchg(block[ii*4+0], block[ii*4+2]);
			
 
				-						}
			
 
				+							for (uint32_t ii = 0; ii < 16; ++ii)
			
 
				+							{ // BGRx
			
 
				+								bx::memCopy(&block[ii*4], &ptr[(ii%4)*pitch + (ii&~3)], 4);
			
 
				+								bx::xchg(block[ii*4+0], block[ii*4+2]);
			
 
				+							}
			
 
				 
			
 
				-						*dst++ = ProcessRGB_ETC2(block);
			
 
				+							*dstBlock++ = ProcessRGB_ETC2(block);
			
 
				+						}
			
 
				 					}
			
 
				 				}
			
 
				-			}
			
 
				-			break;
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::PTC14:
			
 
				-			{
			
 
				-				using namespace Javelin;
			
 
				-				RgbaBitmap bmp;
			
 
				-				bmp.width  = _width;
			
 
				-				bmp.height = _height;
			
 
				-				bmp.data   = (uint8_t*)const_cast<void*>(_src);
			
 
				-				PvrTcEncoder::EncodeRgb4Bpp(_dst, bmp);
			
 
				-				bmp.data = NULL;
			
 
				-			}
			
 
				-			break;
			
 
				+			case TextureFormat::PTC14:
			
 
				+				{
			
 
				+					using namespace Javelin;
			
 
				+					RgbaBitmap bmp;
			
 
				+					bmp.width  = _width;
			
 
				+					bmp.height = _height;
			
 
				+					bmp.data   = const_cast<uint8_t*>(src);
			
 
				+					PvrTcEncoder::EncodeRgb4Bpp(dst, bmp);
			
 
				+					bmp.data = NULL;
			
 
				+				}
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::PTC14A:
			
 
				-			{
			
 
				-				using namespace Javelin;
			
 
				-				RgbaBitmap bmp;
			
 
				-				bmp.width  = _width;
			
 
				-				bmp.height = _height;
			
 
				-				bmp.data   = (uint8_t*)const_cast<void*>(_src);
			
 
				-				PvrTcEncoder::EncodeRgba4Bpp(_dst, bmp);
			
 
				-				bmp.data = NULL;
			
 
				-			}
			
 
				-			break;
			
 
				+			case TextureFormat::PTC14A:
			
 
				+				{
			
 
				+					using namespace Javelin;
			
 
				+					RgbaBitmap bmp;
			
 
				+					bmp.width  = _width;
			
 
				+					bmp.height = _height;
			
 
				+					bmp.data   = const_cast<uint8_t*>(src);
			
 
				+					PvrTcEncoder::EncodeRgba4Bpp(dst, bmp);
			
 
				+					bmp.data = NULL;
			
 
				+				}
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::BGRA8:
			
 
				-			imageSwizzleBgra8(_dst, _width, _height, _width*4, _src);
			
 
				-			break;
			
 
				+			case TextureFormat::BGRA8:
			
 
				+				imageSwizzleBgra8(dst, _width, _height, _width*4, src);
			
 
				+				break;
			
 
				 
			
 
				-		case TextureFormat::RGBA8:
			
 
				-			bx::memCopy(_dst, _src, _width*_height*4);
			
 
				-			break;
			
 
				+			case TextureFormat::RGBA8:
			
 
				+				bx::memCopy(dst, src, _width*_height*4);
			
 
				+				break;
			
 
				 
			
 
				-		default:
			
 
				-			if (!imageConvert(_dst, _format, _src, TextureFormat::RGBA8, _width, _height) )
			
 
				-			{
			
 
				-				BX_ERROR_SET(_err, BIMG_ERROR, "Unable to convert between input/output formats!");
			
 
				+			default:
			
 
				+				if (!imageConvert(dst, _format, src, TextureFormat::RGBA8, _width, _height, 1) )
			
 
				+				{
			
 
				+					BX_ERROR_SET(_err, BIMG_ERROR, "Unable to convert between input/output formats!");
			
 
				+				}
			
 
				+				break;
			
 
				 			}
			
 
				-			break;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageEncodeFromRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, TextureFormat::Enum _dstFormat, Quality::Enum _quality, bx::Error* _err)
			
 
				+	void imageEncodeFromRgba32f(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _dstFormat, Quality::Enum _quality, bx::Error* _err)
			
 
				 	{
			
 
				 		BX_ERROR_SCOPE(_err);
			
 
				 
			
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 
			
 
				-		if (!imageConvert(_dst, _dstFormat, _src, TextureFormat::RGBA32F, _width, _height) )
			
 
				+		if (!imageConvert(_dst, _dstFormat, _src, TextureFormat::RGBA32F, _width, _height, _depth) )
			
 
				 		{
			
 
				-			uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*4);
			
 
				-			if (imageConvert(temp, TextureFormat::RGBA8, _src, TextureFormat::RGBA32F, _width, _height) )
			
 
				+			uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*_depth*4);
			
 
				+			if (imageConvert(temp, TextureFormat::RGBA8, _src, TextureFormat::RGBA32F, _width, _height, _depth) )
			
 
				 			{
			
 
				-				for (uint32_t yy = 0; yy < _height; ++yy)
			
 
				+				for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 				{
			
 
				-					for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+					const uint32_t zoffset = zz*_width*_height;
			
 
				+
			
 
				+					for (uint32_t yy = 0; yy < _height; ++yy)
			
 
				 					{
			
 
				-						const uint32_t offset = yy*_width + xx;
			
 
				-						const float* input = (const float*)&src[offset * 16];
			
 
				-						uint8_t* output    = &temp[offset * 4];
			
 
				-						output[0] = uint8_t(bx::fsaturate(input[0])*255.0f + 0.5f);
			
 
				-						output[1] = uint8_t(bx::fsaturate(input[1])*255.0f + 0.5f);
			
 
				-						output[2] = uint8_t(bx::fsaturate(input[2])*255.0f + 0.5f);
			
 
				-						output[3] = uint8_t(bx::fsaturate(input[3])*255.0f + 0.5f);
			
 
				+						const uint32_t yoffset = zoffset + yy*_width;
			
 
				+
			
 
				+						for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+						{
			
 
				+							const uint32_t offset = yoffset + xx;
			
 
				+							const float* input = (const float*)&src[offset * 16];
			
 
				+							uint8_t* output    = &temp[offset * 4];
			
 
				+							output[0] = uint8_t(bx::fsaturate(input[0])*255.0f + 0.5f);
			
 
				+							output[1] = uint8_t(bx::fsaturate(input[1])*255.0f + 0.5f);
			
 
				+							output[2] = uint8_t(bx::fsaturate(input[2])*255.0f + 0.5f);
			
 
				+							output[3] = uint8_t(bx::fsaturate(input[3])*255.0f + 0.5f);
			
 
				+						}
			
 
				 					}
			
 
				 				}
			
 
				 
			
 
				-				imageEncodeFromRgba8(_dst, temp, _width, _height, _dstFormat, _quality, _err);
			
 
				+				imageEncodeFromRgba8(_dst, temp, _width, _height, _depth, _dstFormat, _quality, _err);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				 				BX_ERROR_SET(_err, BIMG_ERROR, "Unable to convert between input/output formats!");
			
 
				 			}
			
 
				+
			
 
				 			BX_FREE(_allocator, temp);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	void imageRgba32f11to01(void* _dst, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src)
			
 
				+	void imageRgba32f11to01(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _pitch, const void* _src)
			
 
				 	{
			
 
				 		const uint8_t* src = (const uint8_t*)_src;
			
 
				 		uint8_t* dst = (uint8_t*)_dst;
			
 
				 
			
 
				-		for (uint32_t yy = 0; yy < _height; ++yy)
			
 
				+		for (uint32_t zz = 0; zz < _depth; ++zz)
			
 
				 		{
			
 
				-			for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+			for (uint32_t yy = 0; yy < _height; ++yy)
			
 
				 			{
			
 
				-				const uint32_t offset = yy*_pitch + xx * 16;
			
 
				-				const float* input = (const float*)&src[offset];
			
 
				-				float* output = (float*)&dst[offset];
			
 
				-				output[0] = input[0]*0.5f + 0.5f;
			
 
				-				output[1] = input[1]*0.5f + 0.5f;
			
 
				-				output[2] = input[2]*0.5f + 0.5f;
			
 
				-				output[3] = input[3]*0.5f + 0.5f;
			
 
				+				for (uint32_t xx = 0; xx < _width; ++xx)
			
 
				+				{
			
 
				+					const uint32_t offset = yy*_pitch + xx * 16;
			
 
				+					const float* input = (const float*)&src[offset];
			
 
				+					float* output = (float*)&dst[offset];
			
 
				+					output[0] = input[0]*0.5f + 0.5f;
			
 
				+					output[1] = input[1]*0.5f + 0.5f;
			
 
				+					output[2] = input[2]*0.5f + 0.5f;
			
 
				+					output[3] = input[3]*0.5f + 0.5f;
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/tools/texturec/texturec.cpp
+++ b/tools/texturec/texturec.cpp
@@ -26,7 +26,7 @@
 
				 #include <string>
			
 
				 
			
 
				 #define BIMG_TEXTUREC_VERSION_MAJOR 1
			
 
				-#define BIMG_TEXTUREC_VERSION_MINOR 6
			
 
				+#define BIMG_TEXTUREC_VERSION_MINOR 9
			
 
				 
			
 
				 struct Options
			
 
				 {
			
@@ -234,6 +234,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						, mip.m_data
			
 
				 						, dstMip.m_width
			
 
				 						, dstMip.m_height
			
 
				+						, dstMip.m_depth
			
 
				 						, dstMip.m_width*16
			
 
				 						, mip.m_format
			
 
				 						);
			
@@ -264,6 +265,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 					bimg::imageRgba32f11to01(rgbaDst
			
 
				 						, dstMip.m_width
			
 
				 						, dstMip.m_height
			
 
				+						, dstMip.m_depth
			
 
				 						, dstMip.m_width*16
			
 
				 						, rgba
			
 
				 						);
			
@@ -273,6 +275,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						, rgbaDst
			
 
				 						, dstMip.m_width
			
 
				 						, dstMip.m_height
			
 
				+						, dstMip.m_depth
			
 
				 						, outputFormat
			
 
				 						, _options.quality
			
 
				 						, _err
			
@@ -290,6 +293,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						bimg::imageRgba32f11to01(rgbaDst
			
 
				 							, dstMip.m_width
			
 
				 							, dstMip.m_height
			
 
				+							, dstMip.m_depth
			
 
				 							, dstMip.m_width*16
			
 
				 							, rgba
			
 
				 							);
			
@@ -302,6 +306,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 							, rgbaDst
			
 
				 							, dstMip.m_width
			
 
				 							, dstMip.m_height
			
 
				+							, dstMip.m_depth
			
 
				 							, outputFormat
			
 
				 							, _options.quality
			
 
				 							, _err
			
@@ -317,7 +322,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						  NULL
			
 
				 						, uint16_t(dstMip.m_width)
			
 
				 						, uint16_t(dstMip.m_height)
			
 
				-						, 0
			
 
				+						, uint16_t(dstMip.m_depth)
			
 
				 						, false
			
 
				 						, false
			
 
				 						, 1
			
@@ -332,6 +337,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						, mip.m_data
			
 
				 						, mip.m_width
			
 
				 						, mip.m_height
			
 
				+						, mip.m_depth
			
 
				 						, mip.m_width*16
			
 
				 						, mip.m_format
			
 
				 						);
			
@@ -341,6 +347,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						, rgba32f
			
 
				 						, dstMip.m_width
			
 
				 						, dstMip.m_height
			
 
				+						, dstMip.m_depth
			
 
				 						, outputFormat
			
 
				 						, _options.quality
			
 
				 						, _err
			
@@ -352,6 +359,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						bimg::imageRgba32fToLinear(rgba32f
			
 
				 							, mip.m_width
			
 
				 							, mip.m_height
			
 
				+							, mip.m_depth
			
 
				 							, mip.m_width*16
			
 
				 							, rgba32f
			
 
				 							);
			
@@ -361,6 +369,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 							bimg::imageRgba32fLinearDownsample2x2(rgba32f
			
 
				 								, dstMip.m_width
			
 
				 								, dstMip.m_height
			
 
				+								, dstMip.m_depth
			
 
				 								, dstMip.m_width*16
			
 
				 								, rgba32f
			
 
				 								);
			
@@ -371,6 +380,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 							bimg::imageRgba32fToGamma(rgbaDst
			
 
				 								, mip.m_width
			
 
				 								, mip.m_height
			
 
				+								, mip.m_depth
			
 
				 								, mip.m_width*16
			
 
				 								, rgba32f
			
 
				 								);
			
@@ -380,6 +390,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 								, rgbaDst
			
 
				 								, dstMip.m_width
			
 
				 								, dstMip.m_height
			
 
				+								, dstMip.m_depth
			
 
				 								, outputFormat
			
 
				 								, _options.quality
			
 
				 								, _err
			
@@ -395,7 +406,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						  NULL
			
 
				 						, uint16_t(dstMip.m_width)
			
 
				 						, uint16_t(dstMip.m_height)
			
 
				-						, 0
			
 
				+						, uint16_t(dstMip.m_depth)
			
 
				 						, false
			
 
				 						, false
			
 
				 						, 1
			
@@ -437,6 +448,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						, rgba
			
 
				 						, dstMip.m_width
			
 
				 						, dstMip.m_height
			
 
				+						, dstMip.m_depth
			
 
				 						, outputFormat
			
 
				 						, _options.quality
			
 
				 						, _err
			
@@ -447,6 +459,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 						bimg::imageRgba8Downsample2x2(rgba
			
 
				 							, dstMip.m_width
			
 
				 							, dstMip.m_height
			
 
				+							, dstMip.m_depth
			
 
				 							, dstMip.m_width*4
			
 
				 							, rgba
			
 
				 							);
			
@@ -470,6 +483,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 
				 							, rgba
			
 
				 							, dstMip.m_width
			
 
				 							, dstMip.m_height
			
 
				+							, dstMip.m_depth
			
 
				 							, outputFormat
			
 
				 							, _options.quality
			
 
				 							, _err