Răsfoiți Sursa

Merge pull request #3 from bkaradzic/master

Sync to tip
Andrew Willmott 7 ani în urmă
părinte
comite
746f1053d7

+ 3 - 3
3rdparty/nvtt/nvtt.cpp

@@ -50,9 +50,9 @@ namespace nvtt
 					for (uint32_t blockX = 0; blockX < ZOH::Tile::TILE_W; ++blockX)
 					{
 						nv::Vector4 color = srcRgba[blockY*srcRgbaStride + blockX];
-						zohTile.data[blockY][blockX].x = float(int16_t(bx::halfFromFloat(color.x) ) );
-						zohTile.data[blockY][blockX].y = float(int16_t(bx::halfFromFloat(color.y) ) );
-						zohTile.data[blockY][blockX].z = float(int16_t(bx::halfFromFloat(color.z) ) );
+						zohTile.data[blockY][blockX].x = float(int16_t(bx::halfFromFloat(bx::max(color.x, 0.0f) ) ) );
+						zohTile.data[blockY][blockX].y = float(int16_t(bx::halfFromFloat(bx::max(color.y, 0.0f) ) ) );
+						zohTile.data[blockY][blockX].z = float(int16_t(bx::halfFromFloat(bx::max(color.z, 0.0f) ) ) );
 					}
 				}
 

Fișier diff suprimat deoarece este prea mare
+ 287 - 152
3rdparty/tinyexr/tinyexr.h


+ 24 - 10
include/bimg/bimg.h

@@ -9,7 +9,7 @@
 #include <stdint.h> // uint32_t
 #include <stdlib.h> // NULL
 
-#define BIMG_API_VERSION UINT32_C(6)
+#define BIMG_API_VERSION UINT32_C(7)
 
 namespace bx
 {
@@ -73,7 +73,7 @@ namespace bimg
 			ASTC6x6,      //!< ASTC 6x6 3.56 BPP
 			ASTC8x5,      //!< ASTC 8x5 3.20 BPP
 			ASTC8x6,      //!< ASTC 8x6 2.67 BPP
-			ASTC10x5,	  //!< ASTC 10x5 2.56 BPP
+			ASTC10x5,     //!< ASTC 10x5 2.56 BPP
 
 			Unknown,      // Compressed formats above.
 
@@ -334,6 +334,16 @@ namespace bimg
 		, const void* _src
 		);
 
+	///
+	void imageRgba32fDownsample2x2(
+		  void* _dst
+		, uint32_t _width
+		, uint32_t _height
+		, uint32_t _depth
+		, uint32_t _srcPitch
+		, const void* _src
+		);
+
 	///
 	void imageRgba32fDownsample2x2NormalMap(
 		  void* _dst
@@ -493,6 +503,18 @@ namespace bimg
 		, bx::Error* _err
 		);
 
+	///
+	int32_t imageWriteHdr(
+		  bx::WriterI* _writer
+		, uint32_t _width
+		, uint32_t _height
+		, uint32_t _srcPitch
+		, const void* _src
+		, TextureFormat::Enum _format
+		, bool _yflip
+		, bx::Error* _err
+		);
+
 	///
 	int32_t imageWriteDds(
 		  bx::WriterI* _writer
@@ -628,14 +650,6 @@ namespace bimg
 		, ImageMip& _mip
 		);
 
-	///
-	ImageContainer* imageCubemapFromLatLongRgba32F(
-		  bx::AllocatorI* _allocator
-		, const ImageContainer& _input
-		, bool _useBilinearInterpolation
-		, bx::Error* _err
-		);
-
 } // namespace bimg
 
 #endif // BIMG_IMAGE_H_HEADER_GUARD

+ 43 - 0
include/bimg/encode.h

@@ -127,6 +127,49 @@ namespace bimg
 		, float _alphaRef
 		);
 
+	///
+	ImageContainer* imageCubemapFromLatLongRgba32F(
+		  bx::AllocatorI* _allocator
+		, const ImageContainer& _input
+		, bool _useBilinearInterpolation
+		, bx::Error* _err
+		);
+
+	///
+	ImageContainer* imageCubemapFromStripRgba32F(
+		  bx::AllocatorI* _allocator
+		, const ImageContainer& _input
+		, bx::Error* _err
+		);
+
+	///
+	ImageContainer* imageGenerateMips(
+		  bx::AllocatorI* _allocator
+		, const ImageContainer& _image
+		);
+
+	struct LightingModel
+	{
+		enum Enum
+		{
+			Phong,
+			PhongBrdf,
+			Blinn,
+			BlinnBrdf,
+			Ggx,
+
+			Count
+		};
+	};
+
+	///
+	ImageContainer* imageCubemapRadianceFilter(
+		  bx::AllocatorI* _allocator
+		, const ImageContainer& _image
+		, LightingModel::Enum _lightingModel
+		, bx::Error* _err
+		);
+
 } // namespace bimg
 
 #endif // BIMG_ENCODE_H_HEADER_GUARD

+ 1 - 1
makefile

@@ -308,7 +308,7 @@ texturev: .build/projects/$(BUILD_PROJECT_DIR) ## Build texturev tool.
 	$(SILENT) $(MAKE) -C .build/projects/$(BUILD_PROJECT_DIR) texturev config=$(BUILD_TOOLS_CONFIG)
 	$(SILENT) cp .build/$(BUILD_OUTPUT_DIR)/bin/texturev$(BUILD_TOOLS_SUFFIX)$(EXE) tools/bin/$(OS)/texturev$(EXE)
 
-tools: geometryc shaderc texturec texturev ## Build tools.
+tools: texturec ## Build tools.
 
 clean-tools: ## Clean tools projects.
 	-$(SILENT) rm -r .build/projects/$(BUILD_PROJECT_DIR)

+ 1 - 0
scripts/bimg_encode.lua

@@ -17,6 +17,7 @@ project "bimg_encode"
 	files {
 		path.join(BIMG_DIR, "include/**"),
 		path.join(BIMG_DIR, "src/image_encode.*"),
+		path.join(BIMG_DIR, "src/image_cubemap_filter.*"),
 		path.join(BIMG_DIR, "3rdparty/libsquish/**.cpp"),
 		path.join(BIMG_DIR, "3rdparty/libsquish/**.h"),
 		path.join(BIMG_DIR, "3rdparty/edtaa3/**.cpp"),

+ 1 - 1
src/bimg_p.h

@@ -43,7 +43,7 @@ namespace bimg
 	{
 		if (_hasMips)
 		{
-			const uint32_t max = bx::uint32_max(bx::uint32_max(_width, _height), _depth);
+			const uint32_t max = bx::max(_width, _height, _depth);
 			const uint32_t num = 1 + uint32_t(bx::log2(float(max) ) );
 
 			return uint8_t(num);

+ 333 - 336
src/image.cpp

@@ -10,7 +10,7 @@ namespace bimg
 {
 	static const ImageBlockInfo s_imageBlockInfo[] =
 	{
-		//  +-------------------------------------------- bits per pixel
+		//  +--------------------------------------------- bits per pixel
 		//  |   +----------------------------------------- block width
 		//  |   |  +-------------------------------------- block height
 		//  |   |  |   +---------------------------------- block size
@@ -38,15 +38,15 @@ namespace bimg
 		{   4,  4, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC14A
 		{   2,  8, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC22
 		{   4,  4, 4,  8, 2, 2,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // PTC24
-        {   4,  4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ATC
-        {   8,  4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ATCE
-        {   8,  4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ATCI
-        {   8,  4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC4x4
-        {   6,  5, 5, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC5x5
-        {   4,  6, 6, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC6x6
-        {   4,  8, 5, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC8x5
-        {   3,  8, 6, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC8x6
-        {   3, 10, 5, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC10x5
+		{   4,  4, 4,  8, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ATC
+		{   8,  4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ATCE
+		{   8,  4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ATCI
+		{   8,  4, 4, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC4x4
+		{   6,  5, 5, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC5x5
+		{   4,  6, 6, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC6x6
+		{   4,  8, 5, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC8x5
+		{   3,  8, 6, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC8x6
+		{   3, 10, 5, 16, 1, 1,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // ASTC10x5
 		{   0,  0, 0,  0, 0, 0,  0, 0,  0,  0,  0,  0, uint8_t(bx::EncodingType::Count) }, // Unknown
 		{   1,  8, 1,  1, 1, 1,  0, 0,  1,  0,  0,  0, uint8_t(bx::EncodingType::Unorm) }, // R1
 		{   8,  1, 1,  1, 1, 1,  0, 0,  0,  0,  0,  8, uint8_t(bx::EncodingType::Unorm) }, // A8
@@ -128,15 +128,15 @@ namespace bimg
 		"PTC14A",     // PTC14A
 		"PTC22",      // PTC22
 		"PTC24",      // PTC24
-        "ATC",        // ATC
-        "ATCE",       // ATCE
-        "ATCI",       // ATCI
-        "ASTC4x4",    // ASTC4x4
-        "ASTC5x5",    // ASTC5x5
-        "ASTC6x6",    // ASTC6x6
-        "ASTC8x5",    // ASTC8x5
-        "ASTC8x6",    // ASTC8x6
-        "ASTC10x5",   // ASTC10x5
+		"ATC",        // ATC
+		"ATCE",       // ATCE
+		"ATCI",       // ATCI
+		"ASTC4x4",    // ASTC4x4
+		"ASTC5x5",    // ASTC5x5
+		"ASTC6x6",    // ASTC6x6
+		"ASTC8x5",    // ASTC8x5
+		"ASTC8x6",    // ASTC8x6
+		"ASTC10x5",   // ASTC10x5
 		"<unknown>",  // Unknown
 		"R1",         // R1
 		"A8",         // A8
@@ -276,9 +276,9 @@ namespace bimg
 		const uint16_t minBlockX   = blockInfo.minBlockX;
 		const uint16_t minBlockY   = blockInfo.minBlockY;
 
-		_width  = bx::uint16_max(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth )*blockWidth);
-		_height = bx::uint16_max(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
-		_depth  = bx::uint16_max(1, _depth);
+		_width  = bx::max<uint16_t>(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth )*blockWidth);
+		_height = bx::max<uint16_t>(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
+		_depth  = bx::max<uint16_t>(1, _depth);
 
 		uint8_t numMips = calcNumMips(true, _width, _height, _depth);
 
@@ -289,12 +289,15 @@ namespace bimg
 	{
 		const ImageBlockInfo& blockInfo = getBlockInfo(_format);
 		const uint8_t  bpp         = blockInfo.bitsPerPixel;
-        const uint16_t blockSize   = blockInfo.blockSize;
 		const uint16_t blockWidth  = blockInfo.blockWidth;
 		const uint16_t blockHeight = blockInfo.blockHeight;
 		const uint16_t minBlockX   = blockInfo.minBlockX;
 		const uint16_t minBlockY   = blockInfo.minBlockY;
+		const uint8_t  blockSize   = blockInfo.blockSize;
 
+		_width  = bx::max<uint16_t>(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth)*blockWidth);
+		_height = bx::max<uint16_t>(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
+		_depth  = bx::max<uint16_t>(1, _depth);
 		const uint8_t  numMips = calcNumMips(_hasMips, _width, _height, _depth);
 		const uint32_t sides   = _cubeMap ? 6 : 1;
 
@@ -303,22 +306,18 @@ namespace bimg
 		uint32_t depth  = _depth;
 		uint32_t size   = 0;
 
-        if (_format != TextureFormat::Unknown)
-        {
-            for (uint32_t lod = 0; lod < numMips; ++lod)
-            {
-                depth  = bx::uint32_max(1, depth);
-
-                uint16_t blocksX = bx::uint32_max(minBlockX, ((width  + blockWidth  - 1) / blockWidth ));
-                uint16_t blocksY = bx::uint32_max(minBlockY, ((height + blockHeight - 1) / blockHeight));
+		for (uint32_t lod = 0; lod < numMips; ++lod)
+		{
+			width  = bx::max<uint32_t>(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
+			height = bx::max<uint32_t>(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
+			depth  = bx::max<uint32_t>(1, depth);
 
-                size += blocksX * blocksY * blockSize * depth * sides;
+			size += uint32_t(uint64_t(width/blockWidth * height/blockHeight * depth)*blockSize * sides);
 
-                width  >>= 1;
-                height >>= 1;
-                depth  >>= 1;
-            }
-        }
+			width  >>= 1;
+			height >>= 1;
+			depth  >>= 1;
+		}
 
 		size *= _numLayers;
 
@@ -338,7 +337,7 @@ namespace bimg
 		return size;
 	}
 
-    void imageSolid(void* _dst, uint32_t _width, uint32_t _height, uint32_t _solid)
+	void imageSolid(void* _dst, uint32_t _width, uint32_t _height, uint32_t _solid)
 	{
 		uint32_t* dst = (uint32_t*)_dst;
 		for (uint32_t ii = 0, num = _width*_height; ii < num; ++ii)
@@ -711,6 +710,136 @@ namespace bimg
 		imageRgba32fLinearDownsample2x2Ref(_dst, _width, _height, _depth, _srcPitch, _src);
 	}
 
+	void imageRgba32fDownsample2x2Ref(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
+	{
+		const uint32_t dstWidth  = _width/2;
+		const uint32_t dstHeight = _height/2;
+		const uint32_t dstDepth  = _depth/2;
+
+		if (0 == dstWidth
+		||  0 == dstHeight)
+		{
+			return;
+		}
+
+		const uint8_t* src = (const uint8_t*)_src;
+		uint8_t* dst = (uint8_t*)_dst;
+
+		if (0 == dstDepth)
+		{
+			for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
+			{
+				const float* rgba0 = (const float*)&src[0];
+				const float* rgba1 = (const float*)&src[_srcPitch];
+				for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba0 += 8, rgba1 += 8, dst += 16)
+				{
+					float xyz[4];
+
+					xyz[0]  = bx::toLinear(rgba0[0]);
+					xyz[1]  = bx::toLinear(rgba0[1]);
+					xyz[2]  = bx::toLinear(rgba0[2]);
+					xyz[3]  =              rgba0[3];
+
+					xyz[0] += bx::toLinear(rgba0[4]);
+					xyz[1] += bx::toLinear(rgba0[5]);
+					xyz[2] += bx::toLinear(rgba0[6]);
+					xyz[3] +=              rgba0[7];
+
+					xyz[0] += bx::toLinear(rgba1[0]);
+					xyz[1] += bx::toLinear(rgba1[1]);
+					xyz[2] += bx::toLinear(rgba1[2]);
+					xyz[3] +=              rgba1[3];
+
+					xyz[0] += bx::toLinear(rgba1[4]);
+					xyz[1] += bx::toLinear(rgba1[5]);
+					xyz[2] += bx::toLinear(rgba1[6]);
+					xyz[3] +=              rgba1[7];
+
+					xyz[0] = bx::toGamma(xyz[0]/4.0f);
+					xyz[1] = bx::toGamma(xyz[1]/4.0f);
+					xyz[2] = bx::toGamma(xyz[2]/4.0f);
+					xyz[3] =             xyz[3]/4.0f;
+
+					bx::packRgba32F(dst, xyz);
+				}
+			}
+		}
+		else
+		{
+			const uint32_t slicePitch = _srcPitch*_height;
+
+			for (uint32_t zz = 0; zz < dstDepth; ++zz, src += slicePitch)
+			{
+				for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep)
+				{
+					const float* rgba0 = (const float*)&src[0];
+					const float* rgba1 = (const float*)&src[_srcPitch];
+					const float* rgba2 = (const float*)&src[slicePitch];
+					const float* rgba3 = (const float*)&src[slicePitch+_srcPitch];
+					for (uint32_t xx = 0
+						; xx < dstWidth
+						; ++xx, rgba0 += 8, rgba1 += 8, rgba2 += 8, rgba3 += 8, dst += 16
+						)
+					{
+						float xyz[4];
+
+						xyz[0]  = bx::toLinear(rgba0[0]);
+						xyz[1]  = bx::toLinear(rgba0[1]);
+						xyz[2]  = bx::toLinear(rgba0[2]);
+						xyz[3]  =              rgba0[3];
+
+						xyz[0] += bx::toLinear(rgba0[4]);
+						xyz[1] += bx::toLinear(rgba0[5]);
+						xyz[2] += bx::toLinear(rgba0[6]);
+						xyz[3] +=              rgba0[7];
+
+						xyz[0] += bx::toLinear(rgba1[0]);
+						xyz[1] += bx::toLinear(rgba1[1]);
+						xyz[2] += bx::toLinear(rgba1[2]);
+						xyz[3] +=              rgba1[3];
+
+						xyz[0] += bx::toLinear(rgba1[4]);
+						xyz[1] += bx::toLinear(rgba1[5]);
+						xyz[2] += bx::toLinear(rgba1[6]);
+						xyz[3] +=              rgba1[7];
+
+						xyz[0] += bx::toLinear(rgba2[0]);
+						xyz[1] += bx::toLinear(rgba2[1]);
+						xyz[2] += bx::toLinear(rgba2[2]);
+						xyz[3] +=              rgba2[3];
+
+						xyz[0] += bx::toLinear(rgba2[4]);
+						xyz[1] += bx::toLinear(rgba2[5]);
+						xyz[2] += bx::toLinear(rgba2[6]);
+						xyz[3] +=              rgba2[7];
+
+						xyz[0] += bx::toLinear(rgba3[0]);
+						xyz[1] += bx::toLinear(rgba3[1]);
+						xyz[2] += bx::toLinear(rgba3[2]);
+						xyz[3] +=              rgba3[3];
+
+						xyz[0] += bx::toLinear(rgba3[4]);
+						xyz[1] += bx::toLinear(rgba3[5]);
+						xyz[2] += bx::toLinear(rgba3[6]);
+						xyz[3] +=              rgba3[7];
+
+						xyz[0] = bx::toGamma(xyz[0]/8.0f);
+						xyz[1] = bx::toGamma(xyz[1]/8.0f);
+						xyz[2] = bx::toGamma(xyz[2]/8.0f);
+						xyz[3] =             xyz[3]/8.0f;
+
+						bx::packRgba32F(dst, xyz);
+					}
+				}
+			}
+		}
+	}
+
+	void imageRgba32fDownsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src)
+	{
+		imageRgba32fDownsample2x2Ref(_dst, _width, _height, _depth, _srcPitch, _src);
+	}
+
 	void imageRgba32fDownsample2x2NormalMapRef(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, uint32_t _dstPitch, const void* _src)
 	{
 		const uint32_t dstWidth  = _width/2;
@@ -872,15 +1001,15 @@ namespace bimg
 		{ NULL,               NULL                 }, // PTC14A
 		{ NULL,               NULL                 }, // PTC22
 		{ NULL,               NULL                 }, // PTC24
-        { NULL,               NULL                 }, // ATC
-        { NULL,               NULL                 }, // ATCE
-        { NULL,               NULL                 }, // ATCI
-        { NULL,               NULL                 }, // ASTC4x4
-        { NULL,               NULL                 }, // ASTC5x5
-        { NULL,               NULL                 }, // ASTC6x6
-        { NULL,               NULL                 }, // ASTC8x5
-        { NULL,               NULL                 }, // ASTC8x6
-        { NULL,               NULL                 }, // ASTC10x5
+		{ NULL,               NULL                 }, // ATC
+		{ NULL,               NULL                 }, // ATCE
+		{ NULL,               NULL                 }, // ATCI
+		{ NULL,               NULL                 }, // ASTC4x4
+		{ NULL,               NULL                 }, // ASTC5x5
+		{ NULL,               NULL                 }, // ASTC6x6
+		{ NULL,               NULL                 }, // ASTC8x5
+		{ NULL,               NULL                 }, // ASTC8x6
+		{ NULL,               NULL                 }, // ASTC10x5
 		{ NULL,               NULL                 }, // Unknown
 		{ NULL,               NULL                 }, // R1
 		{ bx::packR8,         bx::unpackR8         }, // A8
@@ -1039,7 +1168,7 @@ namespace bimg
 
 		if (_dstFormat == _srcFormat)
 		{
-			bx::memCopy(_dst, _src, _width*_height*_depth*srcBpp/8);
+			bx::memCopy(_dst, _src, _width*_height*_depth*(srcBpp/8) );
 			return true;
 		}
 
@@ -2267,61 +2396,61 @@ namespace bimg
 		}
 	}
 
-	// BC6H, BC7
+	// ATC
 	//
-    void decodeBlockATC(uint8_t _dst[16*4], const uint8_t _src[8])
-    {
-        uint8_t colors[4*4];    // You can see from comparison with decodeBlockDXT just how little sense the ATI patent-avoiding(?) modification makes
-
-        uint32_t c0 = _src[0] | (_src[1] << 8);
-        uint32_t c1 = _src[2] | (_src[3] << 8);
-
-        if ((c0 & 0x8000) == 0)
-        {
-            colors[0] = bitRangeConvert( (c0>> 0)&0x1f, 5, 8);
-            colors[1] = bitRangeConvert( (c0>> 5)&0x1f, 5, 8);
-            colors[2] = bitRangeConvert( (c0>>10)&0x1f, 5, 8);
-
-            colors[12] = bitRangeConvert( (c1>> 0)&0x1f, 5, 8);
-            colors[13] = bitRangeConvert( (c1>> 5)&0x3f, 6, 8);
-            colors[14] = bitRangeConvert( (c1>>11)&0x1f, 5, 8);
-
-            colors[ 4] = (2 * colors[0] + colors[12]) / 3;
-            colors[ 5] = (2 * colors[1] + colors[13]) / 3;
-            colors[ 6] = (2 * colors[2] + colors[14]) / 3;
-
-            colors[ 8] = (colors[0] + 2 * colors[12]) / 3;
-            colors[ 9] = (colors[1] + 2 * colors[13]) / 3;
-            colors[10] = (colors[2] + 2 * colors[14]) / 3;
-        }
-        else
-        {
-            colors[ 0] = 0;
-            colors[ 1] = 0;
-            colors[ 2] = 0;
-
-            colors[ 8] = bitRangeConvert( (c0>> 0)&0x1f, 5, 8);
-            colors[ 9] = bitRangeConvert( (c0>> 5)&0x1f, 5, 8);
-            colors[10] = bitRangeConvert( (c0>>10)&0x1f, 5, 8);
-
-            colors[12] = bitRangeConvert( (c1>> 0)&0x1f, 5, 8);
-            colors[13] = bitRangeConvert( (c1>> 5)&0x3f, 6, 8);
-            colors[14] = bitRangeConvert( (c1>>11)&0x1f, 5, 8);
-
-            colors[ 4] = colors[ 8] - colors[12] / 4;
-            colors[ 5] = colors[ 9] - colors[13] / 4;
-            colors[ 6] = colors[10] - colors[14] / 4;
-        }
-
-        for (uint32_t ii = 0, next = 8*4; ii < 16*4; ii += 4, next += 2)
-        {
-            int idx = ( (_src[next>>3] >> (next & 7) ) & 3) * 4;
-            _dst[ii+0] = colors[idx+0];
-            _dst[ii+1] = colors[idx+1];
-            _dst[ii+2] = colors[idx+2];
-            _dst[ii+3] = colors[idx+3];
-        }
-    }
+	void decodeBlockATC(uint8_t _dst[16*4], const uint8_t _src[8])
+	{
+		uint8_t colors[4*4];
+
+		uint32_t c0 = _src[0] | (_src[1] << 8);
+		uint32_t c1 = _src[2] | (_src[3] << 8);
+
+		if (0 == (c0 & 0x8000) )
+		{
+			colors[ 0] = bitRangeConvert( (c0>> 0)&0x1f, 5, 8);
+			colors[ 1] = bitRangeConvert( (c0>> 5)&0x1f, 5, 8);
+			colors[ 2] = bitRangeConvert( (c0>>10)&0x1f, 5, 8);
+
+			colors[12] = bitRangeConvert( (c1>> 0)&0x1f, 5, 8);
+			colors[13] = bitRangeConvert( (c1>> 5)&0x3f, 6, 8);
+			colors[14] = bitRangeConvert( (c1>>11)&0x1f, 5, 8);
+
+			colors[ 4] = (2 * colors[0] + colors[12]) / 3;
+			colors[ 5] = (2 * colors[1] + colors[13]) / 3;
+			colors[ 6] = (2 * colors[2] + colors[14]) / 3;
+
+			colors[ 8] = (colors[0] + 2 * colors[12]) / 3;
+			colors[ 9] = (colors[1] + 2 * colors[13]) / 3;
+			colors[10] = (colors[2] + 2 * colors[14]) / 3;
+		}
+		else
+		{
+			colors[ 0] = 0;
+			colors[ 1] = 0;
+			colors[ 2] = 0;
+
+			colors[ 8] = bitRangeConvert( (c0>> 0)&0x1f, 5, 8);
+			colors[ 9] = bitRangeConvert( (c0>> 5)&0x1f, 5, 8);
+			colors[10] = bitRangeConvert( (c0>>10)&0x1f, 5, 8);
+
+			colors[12] = bitRangeConvert( (c1>> 0)&0x1f, 5, 8);
+			colors[13] = bitRangeConvert( (c1>> 5)&0x3f, 6, 8);
+			colors[14] = bitRangeConvert( (c1>>11)&0x1f, 5, 8);
+
+			colors[ 4] = colors[ 8] - colors[12] / 4;
+			colors[ 5] = colors[ 9] - colors[13] / 4;
+			colors[ 6] = colors[10] - colors[14] / 4;
+		}
+
+		for (uint32_t ii = 0, next = 8*4; ii < 16*4; ii += 4, next += 2)
+		{
+			int32_t idx = ( (_src[next>>3] >> (next & 7) ) & 3) * 4;
+			_dst[ii+0] = colors[idx+0];
+			_dst[ii+1] = colors[idx+1];
+			_dst[ii+2] = colors[idx+2];
+			_dst[ii+3] = colors[idx+3];
+		}
+	}
 
 	static const int32_t s_etc1Mod[8][4] =
 	{
@@ -2968,10 +3097,10 @@ namespace bimg
 		const uint16_t minBlockX   = blockInfo.minBlockX;
 		const uint16_t minBlockY   = blockInfo.minBlockY;
 
-		_width     = bx::uint16_max(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth)*blockWidth);
-		_height    = bx::uint16_max(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
-		_depth     = bx::uint16_max(1, _depth);
-		_numLayers = bx::uint16_max(1, _numLayers);
+		_width     = bx::max<uint16_t>(blockWidth  * minBlockX, ( (_width  + blockWidth  - 1) / blockWidth)*blockWidth);
+		_height    = bx::max<uint16_t>(blockHeight * minBlockY, ( (_height + blockHeight - 1) / blockHeight)*blockHeight);
+		_depth     = bx::max<uint16_t>(1, _depth);
+		_numLayers = bx::max<uint16_t>(1, _numLayers);
 
 		const uint8_t numMips = _hasMips ? imageGetNumMips(_format, _width, _height, _depth) : 1;
 		uint32_t size = imageGetSize(NULL, _width, _height, _depth, _cubeMap, _hasMips, _numLayers, _format);
@@ -3617,15 +3746,15 @@ namespace bimg
 		{ KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG,         KTX_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT,     KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG,         KTX_ZERO,                         }, // PTC14A
 		{ KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG,         KTX_ZERO,                                       KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG,         KTX_ZERO,                         }, // PTC22
 		{ KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG,         KTX_ZERO,                                       KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG,         KTX_ZERO,                         }, // PTC24
-        { KTX_ATC_RGB_AMD,                              KTX_ZERO,                                       KTX_ATC_RGB_AMD,                              KTX_ZERO,                         }, // ATC
-        { KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD,              KTX_ZERO,                                       KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD,              KTX_ZERO,                         }, // ATCE
-        { KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD,          KTX_ZERO,                                       KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD,          KTX_ZERO,                         }, // ATCI
-        { KTX_COMPRESSED_RGBA_ASTC_4x4_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR,       KTX_COMPRESSED_RGBA_ASTC_4x4_KHR,             KTX_ZERO,                         }, // ASTC4x4
-        { KTX_COMPRESSED_RGBA_ASTC_5x5_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR,       KTX_COMPRESSED_RGBA_ASTC_5x5_KHR,             KTX_ZERO,                         }, // ASTC5x5
-        { KTX_COMPRESSED_RGBA_ASTC_6x6_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR,       KTX_COMPRESSED_RGBA_ASTC_6x6_KHR,             KTX_ZERO,                         }, // ASTC6x6
-        { KTX_COMPRESSED_RGBA_ASTC_8x5_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,       KTX_COMPRESSED_RGBA_ASTC_8x5_KHR,             KTX_ZERO,                         }, // ASTC8x5
-        { KTX_COMPRESSED_RGBA_ASTC_8x6_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR,       KTX_COMPRESSED_RGBA_ASTC_8x6_KHR,             KTX_ZERO,                         }, // ASTC8x6
-        { KTX_COMPRESSED_RGBA_ASTC_10x5_KHR,            KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR,      KTX_COMPRESSED_RGBA_ASTC_10x5_KHR,            KTX_ZERO,                         }, // ASTC10x5
+		{ KTX_ATC_RGB_AMD,                              KTX_ZERO,                                       KTX_ATC_RGB_AMD,                              KTX_ZERO,                         }, // ATC
+		{ KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD,              KTX_ZERO,                                       KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD,              KTX_ZERO,                         }, // ATCE
+		{ KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD,          KTX_ZERO,                                       KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD,          KTX_ZERO,                         }, // ATCI
+		{ KTX_COMPRESSED_RGBA_ASTC_4x4_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR,       KTX_COMPRESSED_RGBA_ASTC_4x4_KHR,             KTX_ZERO,                         }, // ASTC4x4
+		{ KTX_COMPRESSED_RGBA_ASTC_5x5_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR,       KTX_COMPRESSED_RGBA_ASTC_5x5_KHR,             KTX_ZERO,                         }, // ASTC5x5
+		{ KTX_COMPRESSED_RGBA_ASTC_6x6_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR,       KTX_COMPRESSED_RGBA_ASTC_6x6_KHR,             KTX_ZERO,                         }, // ASTC6x6
+		{ KTX_COMPRESSED_RGBA_ASTC_8x5_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,       KTX_COMPRESSED_RGBA_ASTC_8x5_KHR,             KTX_ZERO,                         }, // ASTC8x5
+		{ KTX_COMPRESSED_RGBA_ASTC_8x6_KHR,             KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR,       KTX_COMPRESSED_RGBA_ASTC_8x6_KHR,             KTX_ZERO,                         }, // ASTC8x6
+		{ KTX_COMPRESSED_RGBA_ASTC_10x5_KHR,            KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR,      KTX_COMPRESSED_RGBA_ASTC_10x5_KHR,            KTX_ZERO,                         }, // ASTC10x5
 		{ KTX_ZERO,                                     KTX_ZERO,                                       KTX_ZERO,                                     KTX_ZERO,                         }, // Unknown
 		{ KTX_ZERO,                                     KTX_ZERO,                                       KTX_ZERO,                                     KTX_ZERO,                         }, // R1
 		{ KTX_ALPHA,                                    KTX_ZERO,                                       KTX_ALPHA,                                    KTX_UNSIGNED_BYTE,                }, // A8
@@ -3784,8 +3913,8 @@ namespace bimg
 		_imageContainer.m_depth       = depth;
 		_imageContainer.m_format      = format;
 		_imageContainer.m_orientation = Orientation::R0;
-		_imageContainer.m_numLayers   = uint16_t(bx::uint32_max(numberOfArrayElements, 1) );
-		_imageContainer.m_numMips     = uint8_t(bx::uint32_max(numMips, 1) );
+		_imageContainer.m_numLayers   = uint16_t(bx::max<uint32_t>(numberOfArrayElements, 1) );
+		_imageContainer.m_numMips     = uint8_t(bx::max<uint32_t>(numMips, 1) );
 		_imageContainer.m_hasAlpha    = hasAlpha;
 		_imageContainer.m_cubeMap     = numFaces > 1;
 		_imageContainer.m_ktx         = true;
@@ -3950,7 +4079,7 @@ namespace bimg
 		_imageContainer.m_format      = format;
 		_imageContainer.m_orientation = Orientation::R0;
 		_imageContainer.m_numLayers   = 1;
-		_imageContainer.m_numMips     = uint8_t(bx::uint32_max(numMips, 1) );
+		_imageContainer.m_numMips     = uint8_t(bx::max<uint32_t>(numMips, 1) );
 		_imageContainer.m_hasAlpha    = hasAlpha;
 		_imageContainer.m_cubeMap     = numFaces > 1;
 		_imageContainer.m_ktx         = false;
@@ -4605,28 +4734,35 @@ namespace bimg
 
 			for (uint8_t lod = 0, num = _imageContainer.m_numMips; lod < num; ++lod)
 			{
-				uint32_t sourceSize = bx::toHostEndian(*(const uint32_t*)&data[offset], _imageContainer.m_ktxLE);
-				offset += sizeof(uint32_t);
-
-				depth  = bx::uint32_max(1, depth);
+				width  = bx::max<uint32_t>(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
+				height = bx::max<uint32_t>(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
+				depth  = bx::max<uint32_t>(1, depth);
 
-				uint32_t blocksX = bx::uint32_max(minBlockX, ((width  + blockWidth  - 1) / blockWidth ));
-				uint32_t blocksY = bx::uint32_max(minBlockY, ((height + blockHeight - 1) / blockHeight));
+				const uint32_t mipSize = width/blockWidth * height/blockHeight * depth * blockSize;
+				if (mipSize != width*height*depth*bpp/8)
+				{
+					BX_TRACE("x");
+				}
 
-                uint32_t destSize = blocksX * blocksY * blockSize * depth;
+				const uint32_t size = mipSize*numSides;
+				uint32_t imageSize = bx::toHostEndian(*(const uint32_t*)&data[offset], _imageContainer.m_ktxLE);
+				BX_CHECK(size == imageSize, "KTX: Image size mismatch %d (expected %d).", size, imageSize);
+				BX_UNUSED(size, imageSize);
 
-				BX_CHECK(sourceSize == destSize, "KTX: Image size mismatch %d (expected %d).", sourceSize, destSize);
+				offset += sizeof(uint32_t);
 
 				for (uint16_t side = 0; side < numSides; ++side)
 				{
+					BX_CHECK(offset <= _size, "Reading past size of data buffer! (offset %d, size %d)", offset, _size);
+
 					if (side == _side
 					&&  lod  == _lod)
 					{
-						_mip.m_width     = blocksX * blockWidth;
-						_mip.m_height    = blocksY * blockHeight;
+						_mip.m_width     = width;
+						_mip.m_height    = height;
 						_mip.m_depth     = depth;
 						_mip.m_blockSize = blockSize;
-						_mip.m_size      = destSize;
+						_mip.m_size      = mipSize;
 						_mip.m_data      = &data[offset];
 						_mip.m_bpp       = bpp;
 						_mip.m_format    = format;
@@ -4634,9 +4770,8 @@ namespace bimg
 						return true;
 					}
 
-					offset += sourceSize;
+					offset += mipSize;
 
-					BX_CHECK(offset <= _size, "Reading past size of data buffer! (offset %d, size %d)", offset, _size);
 					BX_UNUSED(_size);
 				}
 
@@ -4655,21 +4790,22 @@ namespace bimg
 
 				for (uint8_t lod = 0, num = _imageContainer.m_numMips; lod < num; ++lod)
 				{
-					depth  = bx::uint32_max(1, depth);
+					BX_CHECK(offset <= _size, "Reading past size of data buffer! (offset %d, size %d)", offset, _size);
 
-					uint32_t blocksX = bx::uint32_max(minBlockX, ((width  + blockWidth  - 1) / blockWidth ));
-					uint32_t blocksY = bx::uint32_max(minBlockY, ((height + blockHeight - 1) / blockHeight));
+					width  = bx::max<uint32_t>(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
+					height = bx::max<uint32_t>(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
+					depth  = bx::max<uint32_t>(1, depth);
 
-					uint32_t size = blocksX * blocksY * blockSize * depth;
+					uint32_t mipSize = width/blockWidth * height/blockHeight * depth * blockSize;
 
 					if (side == _side
 					&&  lod  == _lod)
 					{
-						_mip.m_width     = blocksX * blockWidth;
-						_mip.m_height    = blocksY * blockHeight;
+						_mip.m_width     = width;
+						_mip.m_height    = height;
 						_mip.m_depth     = depth;
 						_mip.m_blockSize = blockSize;
-						_mip.m_size      = size;
+						_mip.m_size      = mipSize;
 						_mip.m_data      = &data[offset];
 						_mip.m_bpp       = bpp;
 						_mip.m_format    = format;
@@ -4677,9 +4813,8 @@ namespace bimg
 						return true;
 					}
 
-					offset += size;
+					offset += mipSize;
 
-					BX_CHECK(offset <= _size, "Reading past size of data buffer! (offset %d, size %d)", offset, _size);
 					BX_UNUSED(_size);
 
 					width  >>= 1;
@@ -4891,13 +5026,15 @@ namespace bimg
 		total += bx::writeLE(_writer, uint32_t(18*4+1), _err);
 
 		const uint8_t cdata[] = { 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 };
-		total += bx::write(_writer, 'R', _err);
-		total += bx::write(_writer, cdata, BX_COUNTOF(cdata), _err);
-		total += bx::write(_writer, 'G', _err);
+		// Order is always ABGR order because Photoshop and GIMP ignore these fields and
+		// assume it's in ABGR order.
+		total += bx::write(_writer, 'A', _err);
 		total += bx::write(_writer, cdata, BX_COUNTOF(cdata), _err);
 		total += bx::write(_writer, 'B', _err);
 		total += bx::write(_writer, cdata, BX_COUNTOF(cdata), _err);
-		total += bx::write(_writer, 'A', _err);
+		total += bx::write(_writer, 'G', _err);
+		total += bx::write(_writer, cdata, BX_COUNTOF(cdata), _err);
+		total += bx::write(_writer, 'R', _err);
 		total += bx::write(_writer, cdata, BX_COUNTOF(cdata), _err);
 		total += bx::write(_writer, '\0', _err);
 
@@ -4973,7 +5110,12 @@ namespace bimg
 
 			for (uint32_t xx = 0; xx < _width && _err->isOk(); ++xx)
 			{
-				total += bx::write(_writer, &data[xx*bpp/8+0*bytesPerChannel], bytesPerChannel, _err);
+				total += bx::write(_writer, &data[xx*bpp/8+3*bytesPerChannel], bytesPerChannel, _err);
+			}
+
+			for (uint32_t xx = 0; xx < _width && _err->isOk(); ++xx)
+			{
+				total += bx::write(_writer, &data[xx*bpp/8+2*bytesPerChannel], bytesPerChannel, _err);
 			}
 
 			for (uint32_t xx = 0; xx < _width && _err->isOk(); ++xx)
@@ -4983,12 +5125,48 @@ namespace bimg
 
 			for (uint32_t xx = 0; xx < _width && _err->isOk(); ++xx)
 			{
-				total += bx::write(_writer, &data[xx*bpp/8+2*bytesPerChannel], bytesPerChannel, _err);
+				total += bx::write(_writer, &data[xx*bpp/8+0*bytesPerChannel], bytesPerChannel, _err);
 			}
 
+			data += _srcPitch;
+		}
+
+		return total;
+	}
+
+	int32_t imageWriteHdr(bx::WriterI* _writer, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, TextureFormat::Enum _format, bool _yflip, bx::Error* _err)
+	{
+		BX_ERROR_SCOPE(_err);
+
+		int32_t total = 0;
+		total += bx::write(_writer, "#?RADIANCE\n" , _err);
+		total += bx::write(_writer, "FORMAT=32-bit_rle_rgbe\n" , _err);
+		total += bx::write(_writer, '\n' , _err);
+
+		total += bx::writePrintf(_writer, "%cY %d +X %d\n", _yflip ? '+' : '-', _height, _width);
+
+		UnpackFn unpack = getUnpack(_format);
+		const uint32_t bpp  = getBitsPerPixel(_format);
+
+		const uint8_t* data = (const uint8_t*)_src;
+		for (uint32_t yy = 0; yy < _height && _err->isOk(); ++yy)
+		{
 			for (uint32_t xx = 0; xx < _width && _err->isOk(); ++xx)
 			{
-				total += bx::write(_writer, &data[xx*bpp/8+3*bytesPerChannel], bytesPerChannel, _err);
+				float rgba[4];
+				unpack(rgba, &data[xx*bpp/8]);
+
+				const float maxVal = bx::max(rgba[0], rgba[1], rgba[2]);
+				const float exp    = bx::ceil(bx::log2(maxVal) );
+				const float toRgb8 = 255.0f * 1.0f/bx::ldexp(1.0f, int(exp) );
+
+				uint8_t rgbe[4];
+				rgbe[0] = uint8_t(rgba[0] * toRgb8);
+				rgbe[1] = uint8_t(rgba[1] * toRgb8);
+				rgbe[2] = uint8_t(rgba[2] * toRgb8);
+				rgbe[3] = uint8_t(exp+128.0f);
+
+				total += bx::write(_writer, rgbe, 4, _err);
 			}
 
 			data += _srcPitch;
@@ -5191,15 +5369,15 @@ namespace bimg
 		}
 
 		const ImageBlockInfo& blockInfo = s_imageBlockInfo[_format];
-		const uint8_t  bpp         = blockInfo.bitsPerPixel;
 		const uint32_t blockWidth  = blockInfo.blockWidth;
 		const uint32_t blockHeight = blockInfo.blockHeight;
 		const uint32_t minBlockX   = blockInfo.minBlockX;
 		const uint32_t minBlockY   = blockInfo.minBlockY;
+		const uint8_t  blockSize   = blockInfo.blockSize;
 
 		const uint8_t* src = (const uint8_t*)_src;
 
-		const uint32_t numLayers = bx::uint32_max(_numLayers, 1);
+		const uint32_t numLayers = bx::max<uint32_t>(_numLayers, 1);
 		const uint32_t numSides = _cubeMap ? 6 : 1;
 
 		uint32_t width  = _width;
@@ -5208,12 +5386,12 @@ namespace bimg
 
 		for (uint8_t lod = 0; lod < _numMips && _err->isOk(); ++lod)
 		{
-			width  = bx::uint32_max(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
-			height = bx::uint32_max(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
-			depth  = bx::uint32_max(1, depth);
+			width  = bx::max<uint32_t>(blockWidth  * minBlockX, ( (width  + blockWidth  - 1) / blockWidth )*blockWidth);
+			height = bx::max<uint32_t>(blockHeight * minBlockY, ( (height + blockHeight - 1) / blockHeight)*blockHeight);
+			depth  = bx::max<uint32_t>(1, depth);
 
-			const uint32_t mipSize = width*height*depth*bpp/8;
-			const uint32_t size = mipSize*numLayers*numSides;
+			const uint32_t mipSize = width/blockWidth * height/blockHeight * depth * blockSize;
+			const uint32_t size    = mipSize * numLayers * numSides;
 			total += bx::write(_writer, size, _err);
 
 			for (uint32_t layer = 0; layer < numLayers && _err->isOk(); ++layer)
@@ -5255,7 +5433,7 @@ namespace bimg
 		}
 
 		const uint32_t numMips   = _imageContainer.m_numMips;
-		const uint32_t numLayers = bx::uint32_max(_imageContainer.m_numLayers, 1);
+		const uint32_t numLayers = bx::max<uint32_t>(_imageContainer.m_numLayers, 1);
 		const uint32_t numSides  = _imageContainer.m_cubeMap ? 6 : 1;
 
 		for (uint8_t lod = 0; lod < numMips && _err->isOk(); ++lod)
@@ -5281,185 +5459,4 @@ namespace bimg
 		return total;
 	}
 
-	//                  +----------+
-	//                  |-z       2|
-	//                  | ^  +y    |
-	//                  | |        |
-	//                  | +---->+x |
-	//       +----------+----------+----------+----------+
-	//       |+y       1|+y       4|+y       0|+y       5|
-	//       | ^  -x    | ^  +z    | ^  +x    | ^  -z    |
-	//       | |        | |        | |        | |        |
-	//       | +---->+z | +---->+x | +---->-z | +---->-x |
-	//       +----------+----------+----------+----------+
-	//                  |+z       3|
-	//                  | ^  -y    |
-	//                  | |        |
-	//                  | +---->+x |
-	//                  +----------+
-	//
-	struct CubeMapFace
-	{
-		float uv[3][3];
-	};
-
-	static const CubeMapFace s_cubeMapFace[] =
-	{
-		{{ // +x face
-			{  0.0f,  0.0f, -1.0f }, // u -> -z
-			{  0.0f, -1.0f,  0.0f }, // v -> -y
-			{  1.0f,  0.0f,  0.0f }, // +x face
-		}},
-		{{ // -x face
-			{  0.0f,  0.0f,  1.0f }, // u -> +z
-			{  0.0f, -1.0f,  0.0f }, // v -> -y
-			{ -1.0f,  0.0f,  0.0f }, // -x face
-		}},
-		{{ // +y face
-			{  1.0f,  0.0f,  0.0f }, // u -> +x
-			{  0.0f,  0.0f,  1.0f }, // v -> +z
-			{  0.0f,  1.0f,  0.0f }, // +y face
-		}},
-		{{ // -y face
-			{  1.0f,  0.0f,  0.0f }, // u -> +x
-			{  0.0f,  0.0f, -1.0f }, // v -> -z
-			{  0.0f, -1.0f,  0.0f }, // -y face
-		}},
-		{{ // +z face
-			{  1.0f,  0.0f,  0.0f }, // u -> +x
-			{  0.0f, -1.0f,  0.0f }, // v -> -y
-			{  0.0f,  0.0f,  1.0f }, // +z face
-		}},
-		{{ // -z face
-			{ -1.0f,  0.0f,  0.0f }, // u -> -x
-			{  0.0f, -1.0f,  0.0f }, // v -> -y
-			{  0.0f,  0.0f, -1.0f }, // -z face
-		}},
-	};
-
-	/// _u and _v should be center addressing and in [-1.0+invSize..1.0-invSize] range.
-	void texelUvToDir(float* _result, uint8_t _side, float _u, float _v)
-	{
-		const CubeMapFace& face = s_cubeMapFace[_side];
-
-		float tmp[3];
-		tmp[0] = face.uv[0][0] * _u + face.uv[1][0] * _v + face.uv[2][0];
-		tmp[1] = face.uv[0][1] * _u + face.uv[1][1] * _v + face.uv[2][1];
-		tmp[2] = face.uv[0][2] * _u + face.uv[1][2] * _v + face.uv[2][2];
-		bx::vec3Norm(_result, tmp);
-	}
-
-	ImageContainer* imageCubemapFromLatLongRgba32F(bx::AllocatorI* _allocator, const ImageContainer& _input, bool _useBilinearInterpolation, bx::Error* _err)
-	{
-		BX_ERROR_SCOPE(_err);
-
-		if (_input.m_depth     != 1
-		&&  _input.m_numLayers != 1
-		&&  _input.m_format    != TextureFormat::RGBA32F
-		&&  _input.m_width/2   != _input.m_height)
-		{
-			BX_ERROR_SET(_err, BIMG_ERROR, "Input image format is not equirectangular projection.");
-			return NULL;
-		}
-
-		const uint32_t srcWidthMinusOne  = _input.m_width-1;
-		const uint32_t srcHeightMinusOne = _input.m_height-1;
-		const uint32_t srcPitch = _input.m_width*16;
-		const uint32_t dstWidth = _input.m_height/2;
-		const uint32_t dstPitch = dstWidth*16;
-		const float invDstWidth = 1.0f / float(dstWidth);
-
-		ImageContainer* output = imageAlloc(_allocator
-			, _input.m_format
-			, uint16_t(dstWidth)
-			, uint16_t(dstWidth)
-			, uint16_t(1)
-			, 1
-			, true
-			, false
-			);
-
-		const uint8_t* srcData = (const uint8_t*)_input.m_data;
-
-		for (uint8_t side = 0; side < 6 && _err->isOk(); ++side)
-		{
-			ImageMip mip;
-			imageGetRawData(*output, side, 0, output->m_data, output->m_size, mip);
-
-			for (uint32_t yy = 0; yy < dstWidth; ++yy)
-			{
-				for (uint32_t xx = 0; xx < dstWidth; ++xx)
-				{
-					float* dstData = (float*)&mip.m_data[yy*dstPitch+xx*16];
-
-					const float uu = 2.0f*xx*invDstWidth - 1.0f;
-					const float vv = 2.0f*yy*invDstWidth - 1.0f;
-
-					float dir[3];
-					texelUvToDir(dir, side, uu, vv);
-
-					float srcU, srcV;
-					bx::vec3ToLatLong(&srcU, &srcV, dir);
-
-					srcU *= srcWidthMinusOne;
-					srcV *= srcHeightMinusOne;
-
-					if (_useBilinearInterpolation)
-					{
-						const uint32_t x0 = uint32_t(srcU);
-						const uint32_t y0 = uint32_t(srcV);
-						const uint32_t x1 = bx::min(x0 + 1, srcWidthMinusOne);
-						const uint32_t y1 = bx::min(y0 + 1, srcHeightMinusOne);
-
-						const float* src0 = (const float*)&srcData[y0*srcPitch + x0*16];
-						const float* src1 = (const float*)&srcData[y0*srcPitch + x1*16];
-						const float* src2 = (const float*)&srcData[y1*srcPitch + x0*16];
-						const float* src3 = (const float*)&srcData[y1*srcPitch + x1*16];
-
-						const float tx   = srcU - float(int32_t(x0) );
-						const float ty   = srcV - float(int32_t(y0) );
-						const float omtx = 1.0f - tx;
-						const float omty = 1.0f - ty;
-
-						float p0[4];
-						bx::vec4Mul(p0, src0, omtx*omty);
-
-						float p1[4];
-						bx::vec4Mul(p1, src1, tx*omty);
-
-						float p2[4];
-						bx::vec4Mul(p2, src2, omtx*ty);
-
-						float p3[4];
-						bx::vec4Mul(p3, src3, tx*ty);
-
-						const float rr = p0[0] + p1[0] + p2[0] + p3[0];
-						const float gg = p0[1] + p1[1] + p2[1] + p3[1];
-						const float bb = p0[2] + p1[2] + p2[2] + p3[2];
-						const float aa = p0[3] + p1[3] + p2[3] + p3[3];
-
-						dstData[0] = rr;
-						dstData[1] = gg;
-						dstData[2] = bb;
-						dstData[3] = aa;
-					}
-					else
-					{
-						const uint32_t x0 = uint32_t(srcU);
-						const uint32_t y0 = uint32_t(srcV);
-						const float* src0 = (const float*)&srcData[y0*srcPitch + x0*16];
-
-						dstData[0] = src0[0];
-						dstData[1] = src0[1];
-						dstData[2] = src0[2];
-						dstData[3] = src0[3];
-					}
-
-				}
-			}
-		}
-
-		return output;
-	}
-
 } // namespace bimg

+ 1207 - 0
src/image_cubemap_filter.cpp

@@ -0,0 +1,1207 @@
+/*
+ * Copyright 2011-2018 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bimg#license-bsd-2-clause
+ */
+
+#include "bimg_p.h"
+#include <bimg/encode.h>
+#include <bx/rng.h>
+
+namespace bimg
+{
+	/*
+	 * Copyright 2014-2015 Dario Manesku. All rights reserved.
+	 * License: http://www.opensource.org/licenses/BSD-2-Clause
+	 */
+
+	//                  +----------+
+	//                  |-z       2|
+	//                  | ^  +y    |
+	//                  | |        |
+	//                  | +---->+x |
+	//       +----------+----------+----------+----------+
+	//       |+y       1|+y       4|+y       0|+y       5|
+	//       | ^  -x    | ^  +z    | ^  +x    | ^  -z    |
+	//       | |        | |        | |        | |        |
+	//       | +---->+z | +---->+x | +---->-z | +---->-x |
+	//       +----------+----------+----------+----------+
+	//                  |+z       3|
+	//                  | ^  -y    |
+	//                  | |        |
+	//                  | +---->+x |
+	//                  +----------+
+	//
+	struct CubeMapFace
+	{
+		enum Enum
+		{
+			PositiveX,
+			NegativeX,
+			PositiveY,
+			NegativeY,
+			PositiveZ,
+			NegativeZ,
+
+			Count
+		};
+
+		struct Edge
+		{
+			enum Enum
+			{
+				Left,
+				Right,
+				Top,
+				Bottom,
+
+				Count
+			};
+		};
+
+		//    --> U    _____
+		//   |        |     |
+		//   v        | +Y  |
+		//   V   _____|_____|_____ _____
+		//      |     |     |     |     |
+		//      | -X  | +Z  | +X  | -Z  |
+		//      |_____|_____|_____|_____|
+		//            |     |
+		//            | -Y  |
+		//            |_____|
+		//
+		// Neighbour faces in order: left, right, top, bottom.
+		// FaceEdge is the edge that belongs to the neighbour face.
+		struct Neighbour
+		{
+			uint8_t m_faceIdx;
+			uint8_t m_faceEdge;
+		};
+
+		float uv[3][3];
+	};
+
+	static const CubeMapFace s_cubeMapFace[] =
+	{
+		{{ // +x face
+			{  0.0f,  0.0f, -1.0f }, // u -> -z
+			{  0.0f, -1.0f,  0.0f }, // v -> -y
+			{  1.0f,  0.0f,  0.0f }, // +x face
+		}},
+		{{ // -x face
+			{  0.0f,  0.0f,  1.0f }, // u -> +z
+			{  0.0f, -1.0f,  0.0f }, // v -> -y
+			{ -1.0f,  0.0f,  0.0f }, // -x face
+		}},
+		{{ // +y face
+			{  1.0f,  0.0f,  0.0f }, // u -> +x
+			{  0.0f,  0.0f,  1.0f }, // v -> +z
+			{  0.0f,  1.0f,  0.0f }, // +y face
+		}},
+		{{ // -y face
+			{  1.0f,  0.0f,  0.0f }, // u -> +x
+			{  0.0f,  0.0f, -1.0f }, // v -> -z
+			{  0.0f, -1.0f,  0.0f }, // -y face
+		}},
+		{{ // +z face
+			{  1.0f,  0.0f,  0.0f }, // u -> +x
+			{  0.0f, -1.0f,  0.0f }, // v -> -y
+			{  0.0f,  0.0f,  1.0f }, // +z face
+		}},
+		{{ // -z face
+			{ -1.0f,  0.0f,  0.0f }, // u -> -x
+			{  0.0f, -1.0f,  0.0f }, // v -> -y
+			{  0.0f,  0.0f, -1.0f }, // -z face
+		}},
+	};
+
+	static const CubeMapFace::Neighbour s_cubeMapFaceNeighbours[6][4] =
+	{
+		{ // +X
+			{ CubeMapFace::PositiveZ, CubeMapFace::Edge::Right  },
+			{ CubeMapFace::NegativeZ, CubeMapFace::Edge::Left   },
+			{ CubeMapFace::PositiveY, CubeMapFace::Edge::Right  },
+			{ CubeMapFace::NegativeY, CubeMapFace::Edge::Right  },
+		},
+		{ // -X
+			{ CubeMapFace::NegativeZ, CubeMapFace::Edge::Right  },
+			{ CubeMapFace::PositiveZ, CubeMapFace::Edge::Left   },
+			{ CubeMapFace::PositiveY, CubeMapFace::Edge::Left   },
+			{ CubeMapFace::NegativeY, CubeMapFace::Edge::Left   },
+		},
+		{ // +Y
+			{ CubeMapFace::NegativeX, CubeMapFace::Edge::Top    },
+			{ CubeMapFace::PositiveX, CubeMapFace::Edge::Top    },
+			{ CubeMapFace::NegativeZ, CubeMapFace::Edge::Top    },
+			{ CubeMapFace::PositiveZ, CubeMapFace::Edge::Top    },
+		},
+		{ // -Y
+			{ CubeMapFace::NegativeX, CubeMapFace::Edge::Bottom },
+			{ CubeMapFace::PositiveX, CubeMapFace::Edge::Bottom },
+			{ CubeMapFace::PositiveZ, CubeMapFace::Edge::Bottom },
+			{ CubeMapFace::NegativeZ, CubeMapFace::Edge::Bottom },
+		},
+		{ // +Z
+			{ CubeMapFace::NegativeX, CubeMapFace::Edge::Right  },
+			{ CubeMapFace::PositiveX, CubeMapFace::Edge::Left   },
+			{ CubeMapFace::PositiveY, CubeMapFace::Edge::Bottom },
+			{ CubeMapFace::NegativeY, CubeMapFace::Edge::Top    },
+		},
+		{ // -Z
+			{ CubeMapFace::PositiveX, CubeMapFace::Edge::Right  },
+			{ CubeMapFace::NegativeX, CubeMapFace::Edge::Left   },
+			{ CubeMapFace::PositiveY, CubeMapFace::Edge::Top    },
+			{ CubeMapFace::NegativeY, CubeMapFace::Edge::Bottom },
+		},
+	};
+
+	/// _u and _v should be center addressing and in [-1.0+invSize..1.0-invSize] range.
+	void texelUvToDir(float* _outDir, uint8_t _side, float _u, float _v)
+	{
+		const CubeMapFace& face = s_cubeMapFace[_side];
+
+		float tmp[3];
+		tmp[0] = face.uv[0][0] * _u + face.uv[1][0] * _v + face.uv[2][0];
+		tmp[1] = face.uv[0][1] * _u + face.uv[1][1] * _v + face.uv[2][1];
+		tmp[2] = face.uv[0][2] * _u + face.uv[1][2] * _v + face.uv[2][2];
+		bx::vec3Norm(_outDir, tmp);
+	}
+
+	void dirToTexelUv(float& _outU, float& _outV, uint8_t& _outSide, const float* _dir)
+	{
+		float absVec[3];
+		bx::vec3Abs(absVec, _dir);
+
+		const float max = bx::max(absVec[0], absVec[1], absVec[2]);
+
+		if (max == absVec[0])
+		{
+			_outSide = (_dir[0] >= 0.0f) ? uint8_t(CubeMapFace::PositiveX) : uint8_t(CubeMapFace::NegativeX);
+		}
+		else if (max == absVec[1])
+		{
+			_outSide = (_dir[1] >= 0.0f) ? uint8_t(CubeMapFace::PositiveY) : uint8_t(CubeMapFace::NegativeY);
+		}
+		else
+		{
+			_outSide = (_dir[2] >= 0.0f) ? uint8_t(CubeMapFace::PositiveZ) : uint8_t(CubeMapFace::NegativeZ);
+		}
+
+		float faceVec[3];
+		bx::vec3Mul(faceVec, _dir, 1.0f/max);
+
+		_outU = (bx::vec3Dot(s_cubeMapFace[_outSide].uv[0], faceVec) + 1.0f) * 0.5f;
+		_outV = (bx::vec3Dot(s_cubeMapFace[_outSide].uv[1], faceVec) + 1.0f) * 0.5f;
+	}
+
+	ImageContainer* imageCubemapFromLatLongRgba32F(bx::AllocatorI* _allocator, const ImageContainer& _input, bool _useBilinearInterpolation, bx::Error* _err)
+	{
+		BX_ERROR_SCOPE(_err);
+
+		if (_input.m_depth     != 1
+		&&  _input.m_numLayers != 1
+		&&  _input.m_format    != TextureFormat::RGBA32F
+		&&  _input.m_width/2   != _input.m_height)
+		{
+			BX_ERROR_SET(_err, BIMG_ERROR, "Input image format is not equirectangular projection.");
+			return NULL;
+		}
+
+		const uint32_t srcWidthMinusOne  = _input.m_width-1;
+		const uint32_t srcHeightMinusOne = _input.m_height-1;
+		const uint32_t srcPitch = _input.m_width*16;
+		const uint32_t dstWidth = _input.m_height/2;
+		const uint32_t dstPitch = dstWidth*16;
+		const float invDstWidth = 1.0f / float(dstWidth);
+
+		ImageContainer* output = imageAlloc(_allocator
+			, _input.m_format
+			, uint16_t(dstWidth)
+			, uint16_t(dstWidth)
+			, uint16_t(1)
+			, 1
+			, true
+			, false
+			);
+
+		const uint8_t* srcData = (const uint8_t*)_input.m_data;
+
+		for (uint8_t side = 0; side < 6 && _err->isOk(); ++side)
+		{
+			ImageMip mip;
+			imageGetRawData(*output, side, 0, output->m_data, output->m_size, mip);
+
+			for (uint32_t yy = 0; yy < dstWidth; ++yy)
+			{
+				for (uint32_t xx = 0; xx < dstWidth; ++xx)
+				{
+					float* dstData = (float*)&mip.m_data[yy*dstPitch+xx*16];
+
+					const float uu = 2.0f*xx*invDstWidth - 1.0f;
+					const float vv = 2.0f*yy*invDstWidth - 1.0f;
+
+					float dir[3];
+					texelUvToDir(dir, side, uu, vv);
+
+					float srcU, srcV;
+					bx::vec3ToLatLong(&srcU, &srcV, dir);
+
+					srcU *= srcWidthMinusOne;
+					srcV *= srcHeightMinusOne;
+
+					if (_useBilinearInterpolation)
+					{
+						const uint32_t x0 = uint32_t(srcU);
+						const uint32_t y0 = uint32_t(srcV);
+						const uint32_t x1 = bx::min(x0 + 1, srcWidthMinusOne);
+						const uint32_t y1 = bx::min(y0 + 1, srcHeightMinusOne);
+
+						const float* src0 = (const float*)&srcData[y0*srcPitch + x0*16];
+						const float* src1 = (const float*)&srcData[y0*srcPitch + x1*16];
+						const float* src2 = (const float*)&srcData[y1*srcPitch + x0*16];
+						const float* src3 = (const float*)&srcData[y1*srcPitch + x1*16];
+
+						const float tx   = srcU - float(int32_t(x0) );
+						const float ty   = srcV - float(int32_t(y0) );
+						const float omtx = 1.0f - tx;
+						const float omty = 1.0f - ty;
+
+						float p0[4];
+						bx::vec4Mul(p0, src0, omtx*omty);
+
+						float p1[4];
+						bx::vec4Mul(p1, src1, tx*omty);
+
+						float p2[4];
+						bx::vec4Mul(p2, src2, omtx*ty);
+
+						float p3[4];
+						bx::vec4Mul(p3, src3, tx*ty);
+
+						const float rr = p0[0] + p1[0] + p2[0] + p3[0];
+						const float gg = p0[1] + p1[1] + p2[1] + p3[1];
+						const float bb = p0[2] + p1[2] + p2[2] + p3[2];
+						const float aa = p0[3] + p1[3] + p2[3] + p3[3];
+
+						dstData[0] = rr;
+						dstData[1] = gg;
+						dstData[2] = bb;
+						dstData[3] = aa;
+					}
+					else
+					{
+						const uint32_t x0 = uint32_t(srcU);
+						const uint32_t y0 = uint32_t(srcV);
+						const float* src0 = (const float*)&srcData[y0*srcPitch + x0*16];
+
+						dstData[0] = src0[0];
+						dstData[1] = src0[1];
+						dstData[2] = src0[2];
+						dstData[3] = src0[3];
+					}
+
+				}
+			}
+		}
+
+		return output;
+	}
+
+	ImageContainer* imageCubemapFromStripRgba32F(bx::AllocatorI* _allocator, const ImageContainer& _input, bx::Error* _err)
+	{
+		BX_ERROR_SCOPE(_err);
+
+		if (_input.m_depth     != 1
+		&&  _input.m_numLayers != 1
+		&&  _input.m_format    != TextureFormat::RGBA32F
+		&&  _input.m_width/6   != _input.m_height)
+		{
+			BX_ERROR_SET(_err, BIMG_ERROR, "Input image format is not strip projection.");
+			return NULL;
+		}
+
+		const uint32_t srcPitch = _input.m_width*16;
+		const uint32_t dstWidth = _input.m_height;
+		const uint32_t dstPitch = dstWidth*16;
+
+		ImageContainer* output = imageAlloc(_allocator
+			, _input.m_format
+			, uint16_t(dstWidth)
+			, uint16_t(dstWidth)
+			, uint16_t(1)
+			, 1
+			, true
+			, false
+			);
+
+		const uint8_t* srcData = (const uint8_t*)_input.m_data;
+
+		for (uint8_t side = 0; side < 6 && _err->isOk(); ++side, srcData += dstPitch)
+		{
+			ImageMip dstMip;
+			imageGetRawData(*output, side, 0, output->m_data, output->m_size, dstMip);
+
+			bx::memCopy(const_cast<uint8_t*>(dstMip.m_data), srcData, dstPitch, dstWidth, srcPitch, dstPitch);
+		}
+
+		return output;
+	}
+
+	inline float areaElement(float _x, float _y)
+	{
+		return bx::atan2(_x*_y, bx::sqrt(_x*_x + _y*_y + 1.0f));
+	}
+
+	float texelSolidAngle(float _u, float _v, float _invFaceSize)
+	{
+		// Reference:
+		//  - https://web.archive.org/web/20180614195754/http://www.mpia.de/~mathar/public/mathar20051002.pdf
+		//  - https://web.archive.org/web/20180614195725/http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/
+		//
+		const float x0 = _u - _invFaceSize;
+		const float x1 = _u + _invFaceSize;
+		const float y0 = _v - _invFaceSize;
+		const float y1 = _v + _invFaceSize;
+
+		return
+			+ areaElement(x1, y1)
+			- areaElement(x0, y1)
+			- areaElement(x1, y0)
+			+ areaElement(x0, y0)
+			;
+	}
+
+	ImageContainer* imageCubemapNormalSolidAngle(bx::AllocatorI* _allocator, uint32_t _size)
+	{
+		const uint32_t dstWidth = _size;
+		const uint32_t dstPitch = dstWidth*16;
+		const float texelSize   = 1.0f / float(dstWidth);
+
+		ImageContainer* output = imageAlloc(_allocator, TextureFormat::RGBA32F, uint16_t(dstWidth), uint16_t(dstWidth), 1, 1, true, false);
+
+		for (uint8_t side = 0; side < 6; ++side)
+		{
+			ImageMip mip;
+			imageGetRawData(*output, side, 0, output->m_data, output->m_size, mip);
+
+			for (uint32_t yy = 0; yy < dstWidth; ++yy)
+			{
+				for (uint32_t xx = 0; xx < dstWidth; ++xx)
+				{
+					float* dstData = (float*)&mip.m_data[yy*dstPitch+xx*16];
+
+					const float uu = float(xx)*texelSize*2.0f - 1.0f;
+					const float vv = float(yy)*texelSize*2.0f - 1.0f;
+
+					texelUvToDir(dstData, side, uu, vv);
+					dstData[3] = texelSolidAngle(uu, vv, texelSize);
+				}
+			}
+		}
+
+		return output;
+	}
+
+	struct Aabb
+	{
+		Aabb()
+		{
+			m_min[0] =  bx::kFloatMax;
+			m_min[1] =  bx::kFloatMax;
+			m_max[0] = -bx::kFloatMax;
+			m_max[1] = -bx::kFloatMax;
+		}
+
+		void add(float _x, float _y)
+		{
+			m_min[0] = bx::min(m_min[0], _x);
+			m_min[1] = bx::min(m_min[1], _y);
+			m_max[0] = bx::max(m_max[0], _x);
+			m_max[1] = bx::max(m_max[1], _y);
+		}
+
+		void clamp(float _min, float _max)
+		{
+			m_min[0] = bx::clamp(m_min[0], _min, _max);
+			m_min[1] = bx::clamp(m_min[1], _min, _max);
+			m_max[0] = bx::clamp(m_max[0], _min, _max);
+			m_max[1] = bx::clamp(m_max[1], _min, _max);
+		}
+
+		bool isEmpty() const
+		{
+			// Has to have at least two points added so that no value is equal to initial state.
+			return ( (m_min[0] ==  bx::kFloatMax)
+				||   (m_min[1] ==  bx::kFloatMax)
+				||   (m_max[0] == -bx::kFloatMax)
+				||   (m_max[1] == -bx::kFloatMax)
+				);
+		}
+
+		float m_min[2];
+		float m_max[2];
+	};
+
+	void calcFilterArea(Aabb* _outFilterArea, const float* _dir, float _filterSize)
+	{
+		///   ______
+		///  |      |
+		///  |      |
+		///  |    x |
+		///  |______|
+		///
+		// Get face and hit coordinates.
+		float uu, vv;
+		uint8_t hitFaceIdx;
+		dirToTexelUv(uu, vv, hitFaceIdx, _dir);
+
+		///  ........
+		///  .      .
+		///  .   ___.
+		///  .  | x |
+		///  ...|___|
+		///
+		// Calculate hit face filter bounds.
+		Aabb hitFaceFilterBounds;
+		hitFaceFilterBounds.add(uu-_filterSize, vv-_filterSize);
+		hitFaceFilterBounds.add(uu+_filterSize, vv+_filterSize);
+		hitFaceFilterBounds.clamp(0.0f, 1.0f);
+
+		// Output result for hit face.
+		bx::memCopy(&_outFilterArea[hitFaceIdx], &hitFaceFilterBounds, sizeof(Aabb));
+
+		/// Filter area might extend on neighbour faces.
+		/// Case when extending over the right edge:
+		///
+		///  --> U
+		/// |        ......
+		/// v       .      .
+		/// V       .      .
+		///         .      .
+		///  ....... ...... .......
+		///  .      .      .      .
+		///  .      .  .....__min .
+		///  .      .  .   .  |  -> amount
+		///  ....... .....x.__|....
+		///         .  .   .  max
+		///         .  ........
+		///         .      .
+		///          ......
+		///         .      .
+		///         .      .
+		///         .      .
+		///          ......
+		///
+
+		struct NeighourFaceBleed
+		{
+			float m_amount;
+			float m_bbMin;
+			float m_bbMax;
+		};
+
+		const NeighourFaceBleed bleed[CubeMapFace::Edge::Count] =
+		{
+			{ // Left
+				_filterSize - uu,
+				hitFaceFilterBounds.m_min[1],
+				hitFaceFilterBounds.m_max[1],
+			},
+			{ // Right
+				uu + _filterSize - 1.0f,
+				hitFaceFilterBounds.m_min[1],
+				hitFaceFilterBounds.m_max[1],
+			},
+			{ // Top
+				_filterSize - vv,
+				hitFaceFilterBounds.m_min[0],
+				hitFaceFilterBounds.m_max[0],
+			},
+			{ // Bottom
+				vv + _filterSize - 1.0f,
+				hitFaceFilterBounds.m_min[0],
+				hitFaceFilterBounds.m_max[0],
+			},
+		};
+
+		// Determine bleeding for each side.
+		for (uint8_t side = 0; side < 4; ++side)
+		{
+			uint8_t currentFaceIdx = hitFaceIdx;
+
+			for (float bleedAmount = bleed[side].m_amount; bleedAmount > 0.0f; bleedAmount -= 1.0f)
+			{
+				uint8_t neighbourFaceIdx  = s_cubeMapFaceNeighbours[currentFaceIdx][side].m_faceIdx;
+				uint8_t neighbourFaceEdge = s_cubeMapFaceNeighbours[currentFaceIdx][side].m_faceEdge;
+				currentFaceIdx = neighbourFaceIdx;
+
+				/// https://code.google.com/p/cubemapgen/source/browse/trunk/CCubeMapProcessor.cpp#773
+				///
+				/// Handle situations when bbMin and bbMax should be flipped.
+				///
+				///    L - Left           ....................T-T
+				///    R - Right          v                     .
+				///    T - Top        __________                .
+				///    B - Bottom    .          |               .
+				///                  .          |               .
+				///                  .          |<...R-T        .
+				///                  .          |    v          v
+				///        .......... ..........|__________ __________
+				///       .          .          .          .          .
+				///       .          .          .          .          .
+				///       .          .          .          .          .
+				///       .          .          .          .          .
+				///        __________ .......... .......... __________
+				///            ^     |          .               ^
+				///            .     |          .               .
+				///            B-L..>|          .               .
+				///                  |          .               .
+				///                  |__________.               .
+				///                       ^                     .
+				///                       ....................B-B
+				///
+				/// Those are:
+				///     B-L, B-B
+				///     T-R, T-T
+				///     (and in reverse order, R-T and L-B)
+				///
+				/// If we add, R-R and L-L (which never occur), we get:
+				///     B-L, B-B
+				///     T-R, T-T
+				///     R-T, R-R
+				///     L-B, L-L
+				///
+				/// And if L = 0, R = 1, T = 2, B = 3 as in NeighbourSides enumeration,
+				/// a general rule can be derived for when to flip bbMin and bbMax:
+				///     if ((a+b) == 3 || (a == b))
+				///     {
+				///        ..flip bbMin and bbMax
+				///     }
+				///
+				float bbMin = bleed[side].m_bbMin;
+				float bbMax = bleed[side].m_bbMax;
+				if ( (side == neighbourFaceEdge)
+				||   (3    == (side + neighbourFaceEdge) ) )
+				{
+					// Flip.
+					bbMin = 1.0f - bbMin;
+					bbMax = 1.0f - bbMax;
+				}
+
+				switch (neighbourFaceEdge)
+				{
+				case CubeMapFace::Edge::Left:
+					{
+						///  --> U
+						/// |  .............
+						/// v  .           .
+						/// V  x___        .
+						///    |   |       .
+						///    |   |       .
+						///    |___x       .
+						///    .           .
+						///    .............
+						///
+						_outFilterArea[neighbourFaceIdx].add(0.0f, bbMin);
+						_outFilterArea[neighbourFaceIdx].add(bleedAmount, bbMax);
+					}
+					break;
+
+				case CubeMapFace::Edge::Right:
+					{
+						///  --> U
+						/// |  .............
+						/// v  .           .
+						/// V  .       x___.
+						///    .       |   |
+						///    .       |   |
+						///    .       |___x
+						///    .           .
+						///    .............
+						///
+						_outFilterArea[neighbourFaceIdx].add(1.0f - bleedAmount, bbMin);
+						_outFilterArea[neighbourFaceIdx].add(1.0f, bbMax);
+					}
+					break;
+
+				case CubeMapFace::Edge::Top:
+					{
+						///  --> U
+						/// |  ...x____ ...
+						/// v  .  |    |  .
+						/// V  .  |____x  .
+						///    .          .
+						///    .          .
+						///    .          .
+						///    ............
+						///
+						_outFilterArea[neighbourFaceIdx].add(bbMin, 0.0f);
+						_outFilterArea[neighbourFaceIdx].add(bbMax, bleedAmount);
+					}
+					break;
+
+				case CubeMapFace::Edge::Bottom:
+					{
+						///  --> U
+						/// |  ............
+						/// v  .          .
+						/// V  .          .
+						///    .          .
+						///    .  x____   .
+						///    .  |    |  .
+						///    ...|____x...
+						///
+						_outFilterArea[neighbourFaceIdx].add(bbMin, 1.0f - bleedAmount);
+						_outFilterArea[neighbourFaceIdx].add(bbMax, 1.0f);
+					}
+					break;
+				}
+
+				// Clamp bounding box to face size.
+				_outFilterArea[neighbourFaceIdx].clamp(0.0f, 1.0f);
+			}
+		}
+	}
+
+	struct Sampler
+	{
+		Sampler(const ImageContainer& _image, uint16_t _side, float _lod, float (*func)(float) )
+		{
+			const float lod = bx::clamp(_lod, 0.0f, float(_image.m_numMips - 1) );
+			imageGetRawData(
+				  _image
+				, _side
+				, uint8_t(func(lod) )
+				, _image.m_data
+				, _image.m_size
+				, mip
+				);
+		}
+
+		ImageMip mip;
+	};
+
+	void texelFetch(float* _rgba, const Sampler& _sampler, uint32_t _u, uint32_t _v)
+	{
+		const uint32_t bpp   = _sampler.mip.m_bpp;
+		const uint32_t pitch = _sampler.mip.m_width*bpp/8;
+		const uint8_t* texel = _sampler.mip.m_data + _v*pitch + _u*bpp/8;
+
+		UnpackFn unpack = getUnpack(_sampler.mip.m_format);
+		unpack(_rgba, texel);
+	}
+
+	void sampleCubeMap(float* _rgba, const ImageContainer& _image, const float* _dir, float _lod)
+	{
+		float uu, vv;
+		uint8_t side;
+		dirToTexelUv(uu, vv, side, _dir);
+
+		const float fu = bx::fract(uu);
+		const float fv = bx::fract(vv);
+		const float fl = bx::fract(_lod);
+
+		float rgbaA[4];
+		{
+			Sampler sampler(_image, side, _lod, bx::floor);
+			const uint32_t widthMinusOne = sampler.mip.m_width-1;
+
+			const uint32_t u0 = uint32_t(uu*widthMinusOne+0.5f);
+			const uint32_t v0 = uint32_t(vv*widthMinusOne+0.5f);
+			const uint32_t u1 = bx::min(u0 + 1, widthMinusOne);
+			const uint32_t v1 = bx::min(v0 + 1, widthMinusOne);
+
+			float rgba00[4];
+			texelFetch(rgba00, sampler, u0, v0);
+
+			float rgba01[4];
+			texelFetch(rgba01, sampler, u0, v1);
+
+			float rgba10[4];
+			texelFetch(rgba10, sampler, u1, v0);
+
+			float rgba11[4];
+			texelFetch(rgba11, sampler, u1, v1);
+
+			rgbaA[0] = bx::lerp(bx::lerp(rgba00[0], rgba01[0], fv), bx::lerp(rgba10[0], rgba11[0], fv), fu);
+			rgbaA[1] = bx::lerp(bx::lerp(rgba00[1], rgba01[1], fv), bx::lerp(rgba10[1], rgba11[1], fv), fu);
+			rgbaA[2] = bx::lerp(bx::lerp(rgba00[2], rgba01[2], fv), bx::lerp(rgba10[2], rgba11[2], fv), fu);
+			rgbaA[3] = bx::lerp(bx::lerp(rgba00[3], rgba01[3], fv), bx::lerp(rgba10[3], rgba11[3], fv), fu);
+		}
+
+		float rgbaB[4];
+		{
+			Sampler sampler(_image, side, _lod, bx::ceil);
+			const uint32_t widthMinusOne = sampler.mip.m_width-1;
+
+			const uint32_t u0 = uint32_t(uu*widthMinusOne+0.5f);
+			const uint32_t v0 = uint32_t(vv*widthMinusOne+0.5f);
+			const uint32_t u1 = bx::min(u0 + 1, widthMinusOne);
+			const uint32_t v1 = bx::min(v0 + 1, widthMinusOne);
+
+			float rgba00[4];
+			texelFetch(rgba00, sampler, u0, v0);
+
+			float rgba01[4];
+			texelFetch(rgba01, sampler, u0, v1);
+
+			float rgba10[4];
+			texelFetch(rgba10, sampler, u1, v0);
+
+			float rgba11[4];
+			texelFetch(rgba11, sampler, u1, v1);
+
+			rgbaB[0] = bx::lerp(bx::lerp(rgba00[0], rgba01[0], fv), bx::lerp(rgba10[0], rgba11[0], fv), fu);
+			rgbaB[1] = bx::lerp(bx::lerp(rgba00[1], rgba01[1], fv), bx::lerp(rgba10[1], rgba11[1], fv), fu);
+			rgbaB[2] = bx::lerp(bx::lerp(rgba00[2], rgba01[2], fv), bx::lerp(rgba10[2], rgba11[2], fv), fu);
+			rgbaB[3] = bx::lerp(bx::lerp(rgba00[3], rgba01[3], fv), bx::lerp(rgba10[3], rgba11[3], fv), fu);
+		}
+
+		_rgba[0] = bx::lerp(rgbaA[0], rgbaB[0], fl);
+		_rgba[1] = bx::lerp(rgbaA[1], rgbaB[1], fl);
+		_rgba[2] = bx::lerp(rgbaA[2], rgbaB[2], fl);
+		_rgba[3] = bx::lerp(rgbaA[3], rgbaB[3], fl);
+	}
+
+	void importanceSampleGgx(float* _result, float _u, float _v, float _roughness, const float* _normal, const float* _tangentX, const float* _tangentY)
+	{
+		const float aa  = bx::square(_roughness);
+		const float phi = bx::kPi2 * _u;
+		const float cosTheta = bx::sqrt( (1.0f - _v) / (1.0f + (bx::square(aa) - 1.0f) * _v) );
+		const float sinTheta = bx::sqrt(bx::abs(1.0f - bx::square(cosTheta) ) );
+
+		const float hh[3] =
+		{
+			sinTheta * bx::cos(phi),
+			sinTheta * bx::sin(phi),
+			cosTheta,
+		};
+
+		_result[0] = _tangentX[0] * hh[0] + _tangentY[0] * hh[1] + _normal[0] * hh[2];
+		_result[1] = _tangentX[1] * hh[0] + _tangentY[1] * hh[1] + _normal[1] * hh[2];
+		_result[2] = _tangentX[2] * hh[0] + _tangentY[2] * hh[1] + _normal[2] * hh[2];
+	}
+
+	float normalDistributionGgx(float _ndoth, float _roughness)
+	{
+		const float alpha   = bx::square(_roughness);
+		const float alphaSq = bx::square(alpha);
+		const float denom   = bx::square(_ndoth) * (alphaSq - 1.0f) + 1.0f;
+		const float denomSq = bx::square(denom);
+		return alphaSq/(bx::kPi * denomSq);
+	}
+
+	void processFilterAreaGgx(
+		  float* _result
+		, const ImageContainer& _image
+		, uint8_t _lod
+		, const float* _dir
+		, float _roughness
+		)
+	{
+		ImageMip mip;
+		imageGetRawData(_image, 0, _lod, _image.m_data, _image.m_size, mip);
+
+		const uint32_t bpp = getBitsPerPixel(_image.m_format);
+
+		constexpr int32_t kNumSamples = 512;
+		const uint32_t pitch      = mip.m_width*bpp/8;
+		const float widthMinusOne = float(mip.m_width-1);
+		const float mipBias       = 0.5f*bx::log2(bx::square(float(_image.m_width) )/float(kNumSamples) );
+
+		UnpackFn unpack = getUnpack(_image.m_format);
+
+		float color[3]    = { 0.0f, 0.0f, 0.0f };
+		float totalWeight = 0.0f;
+
+		// Golden Ratio Sequences for Low-Discrepancy Sampling
+		// https://web.archive.org/web/20180717194847/https://www.graphics.rwth-aachen.de/publication/2/jgt.pdf
+		//
+		// kGoldenSection = (0.5f*bx::sqrt(5.0f) + 0.5f) - 1.0f = 0.61803398875f
+		//
+		const float kGoldenSection = 0.61803398875f;
+		float offset = kGoldenSection;
+
+		float tangentX[3];
+		float tangentY[3];
+		bx::vec3TangentFrame(_dir, tangentX, tangentY);
+
+		for (uint32_t ii = 0; ii < kNumSamples; ++ii)
+		{
+			offset += kGoldenSection;
+			const float vv = ii/float(kNumSamples);
+
+			float hh[3];
+			importanceSampleGgx(hh, offset, vv, _roughness, _dir, tangentX, tangentY);
+
+			const float ddoth2 = 2.0f * bx::vec3Dot(_dir, hh);
+
+			float ll[3];
+			ll[0] = ddoth2 * hh[0] - _dir[0];
+			ll[1] = ddoth2 * hh[1] - _dir[1];
+			ll[2] = ddoth2 * hh[2] - _dir[2];
+
+			const float ndotl = bx::clamp(bx::vec3Dot(_dir, ll), 0.0f, 1.0f);
+
+			if (ndotl > 0.0f)
+			{
+				const float ndoth = bx::clamp(bx::vec3Dot(_dir, hh), 0.0f, 1.0f);
+				const float vdoth = ndoth;
+
+				// Chapter 20. GPU-Based Importance Sampling
+				// http://archive.today/2018.07.14-004914/https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch20.html
+				//
+				const float pdf = normalDistributionGgx(ndoth, _roughness) * ndoth / (4.0f * vdoth);
+				const float lod = bx::max(0.0f, mipBias - 0.5f*bx::log2(pdf));
+
+				float rgba[4];
+				sampleCubeMap(rgba, _image, ll, lod);
+
+				// Optimized Reversible Tonemapper for Resolve
+				// https://web.archive.org/web/20180717182019/https://gpuopen.com/optimized-reversible-tonemapper-for-resolve/
+				// "a single sample with a large HDR value can over-power all other samples"
+				// "instead accept a bias in the resolve and reduce the weighting of samples
+				// as a function of how bright they are"
+				// Include ndotl here to "fold the weighting into the tonemap operation"
+				//
+				const float tm = ndotl / (bx::max(rgba[0], rgba[1], rgba[2]) + 1.0f);
+
+				color[0] += rgba[0] * tm;
+				color[1] += rgba[1] * tm;
+				color[2] += rgba[2] * tm;
+				totalWeight += ndotl;
+			}
+		}
+
+		if (0.0f < totalWeight)
+		{
+			// Optimized Reversible Tonemapper for Resovle
+			// https://web.archive.org/web/20180717182019/https://gpuopen.com/optimized-reversible-tonemapper-for-resolve/
+			// Average, then reverse the tonemapper
+			//
+			const float invWeight = 1.0f/totalWeight;
+			color[0] = color[0] * invWeight;
+			color[1] = color[1] * invWeight;
+			color[2] = color[2] * invWeight;
+
+			const float invTm = 1.0f / (1.0f - bx::max(0.00001f, bx::max(color[0], color[1], color[2])));
+			_result[0] = color[0] * invTm;
+			_result[1] = color[1] * invTm;
+			_result[2] = color[2] * invTm;
+		}
+		else
+		{
+			float uu, vv;
+			uint8_t face;
+			dirToTexelUv(uu, vv, face, _dir);
+
+			imageGetRawData(_image, face, 0, _image.m_data, _image.m_size, mip);
+
+			const uint32_t xx = uint32_t(uu*widthMinusOne);
+			const uint32_t yy = uint32_t(vv*widthMinusOne);
+
+			float rgba[4];
+			unpack(rgba, mip.m_data + yy*pitch + xx*bpp/8);
+
+			_result[0] = rgba[0];
+			_result[1] = rgba[1];
+			_result[2] = rgba[2];
+		}
+	}
+
+	void processFilterArea(
+		  float* _result
+		, const ImageContainer& _image
+		, const ImageContainer& _nsa
+		, uint8_t _lod
+		, const Aabb* _aabb
+		, const float* _dir
+		, float _specularPower
+		, float _specularAngle
+		)
+	{
+		float color[3]    = { 0.0f, 0.0f, 0.0f };
+		float totalWeight = 0.0f;
+
+		const uint32_t bpp = getBitsPerPixel(_image.m_format);
+
+		UnpackFn unpack = getUnpack(_image.m_format);
+
+		for (uint8_t side = 0; side < 6; ++side)
+		{
+			if (_aabb[side].isEmpty() )
+			{
+				continue;
+			}
+
+			ImageMip nsaMip;
+			imageGetRawData(_nsa, side, 0, _nsa.m_data, _nsa.m_size, nsaMip);
+
+			ImageMip mip;
+			if (imageGetRawData(_image, side, _lod, _image.m_data, _image.m_size, mip) )
+			{
+				const uint32_t pitch      = mip.m_width*bpp/8;
+				const float widthMinusOne = float(mip.m_width-1);
+				const float texelSize     = 1.0f/float(mip.m_width);
+				BX_UNUSED(texelSize);
+
+				const uint32_t minX = uint32_t(_aabb[side].m_min[0] * widthMinusOne);
+				const uint32_t maxX = uint32_t(_aabb[side].m_max[0] * widthMinusOne);
+				const uint32_t minY = uint32_t(_aabb[side].m_min[1] * widthMinusOne);
+				const uint32_t maxY = uint32_t(_aabb[side].m_max[1] * widthMinusOne);
+
+				for (uint32_t yy = minY; yy <= maxY; ++yy)
+				{
+					const uint8_t* row = mip.m_data + yy*pitch;
+					BX_UNUSED(row);
+
+					for (uint32_t xx = minX; xx <= maxX; ++xx)
+					{
+						const float* normal = (const float*)&nsaMip.m_data[(yy*nsaMip.m_width+xx)*(nsaMip.m_bpp/8)];
+						const float solidAngle = normal[3];
+						const float ndotl = bx::clamp(bx::vec3Dot(normal, _dir), 0.0f, 1.0f);
+
+						if (ndotl >= _specularAngle)
+						{
+							const float weight = solidAngle * bx::pow(ndotl, _specularPower);
+							float rgba[4];
+							unpack(rgba, row + xx*bpp/8);
+
+							color[0] += rgba[0] * weight;
+							color[1] += rgba[1] * weight;
+							color[2] += rgba[2] * weight;
+							totalWeight += weight;
+						}
+					}
+				}
+
+				if (0.0f < totalWeight)
+				{
+					const float invWeight = 1.0f/totalWeight;
+					_result[0] = color[0] * invWeight;
+					_result[1] = color[1] * invWeight;
+					_result[2] = color[2] * invWeight;
+				}
+				else
+				{
+					float uu, vv;
+					uint8_t face;
+					dirToTexelUv(uu, vv, face, _dir);
+
+					imageGetRawData(_image, face, 0, _image.m_data, _image.m_size, mip);
+
+					const uint32_t xx = uint32_t(uu*widthMinusOne);
+					const uint32_t yy = uint32_t(vv*widthMinusOne);
+
+					float rgba[4];
+					unpack(rgba, mip.m_data + yy*pitch + xx*bpp/8);
+
+					_result[0] = rgba[0];
+					_result[1] = rgba[1];
+					_result[2] = rgba[2];
+				}
+			}
+		}
+	}
+
+	ImageContainer* imageGenerateMips(bx::AllocatorI* _allocator, const ImageContainer& _image)
+	{
+		if (_image.m_format != TextureFormat::RGBA8
+		&&  _image.m_format != TextureFormat::RGBA32F)
+		{
+			return NULL;
+		}
+
+		ImageContainer* output = imageAlloc(_allocator, _image.m_format, uint16_t(_image.m_width), uint16_t(_image.m_height), uint16_t(_image.m_depth), _image.m_numLayers, _image.m_cubeMap, true);
+
+		const uint32_t numMips   = output->m_numMips;
+		const uint32_t numLayers = output->m_numLayers;
+		const uint32_t numSides  = output->m_cubeMap ? 6 : 1;
+
+		for (uint32_t layer = 0; layer < numLayers; ++layer)
+		{
+			for (uint8_t side = 0; side < numSides; ++side)
+			{
+				ImageMip mip;
+				if (imageGetRawData(_image, uint16_t(layer*numSides + side), 0, _image.m_data, _image.m_size, mip) )
+				{
+					for (uint8_t lod = 0; lod < numMips; ++lod)
+					{
+						ImageMip srcMip;
+						imageGetRawData(*output, uint16_t(layer*numSides + side), lod == 0 ? 0 : lod-1, output->m_data, output->m_size, srcMip);
+
+						ImageMip dstMip;
+						imageGetRawData(*output, uint16_t(layer*numSides + side), lod, output->m_data, output->m_size, dstMip);
+
+						uint8_t* dstData = const_cast<uint8_t*>(dstMip.m_data);
+
+						if (0 == lod)
+						{
+							bx::memCopy(dstData, mip.m_data, mip.m_size);
+						}
+						else if (output->m_format == TextureFormat::RGBA8)
+						{
+							imageRgba8Downsample2x2(
+								  dstData
+								, srcMip.m_width
+								, srcMip.m_height
+								, srcMip.m_depth
+								, srcMip.m_width*4
+								, dstMip.m_width*4
+								, srcMip.m_data
+								);
+						}
+						else if (output->m_format == TextureFormat::RGBA32F)
+						{
+							imageRgba32fDownsample2x2(
+								  dstData
+								, srcMip.m_width
+								, srcMip.m_height
+								, srcMip.m_depth
+								, srcMip.m_width*16
+								, srcMip.m_data
+								);
+						}
+					}
+				}
+			}
+		}
+
+		return output;
+	}
+
+	/// Returns the angle of cosine power function where the results are above a small empirical treshold.
+	static float cosinePowerFilterAngle(float _cosinePower)
+	{
+		// Bigger value leads to performance improvement but might hurt the results.
+		// 0.00001f was tested empirically and it gives almost the same values as reference.
+		const float treshold = 0.00001f;
+
+		// Cosine power filter is: pow(cos(angle), power).
+		// We want the value of the angle above each result is <= treshold.
+		// So: angle = acos(pow(treshold, 1.0 / power))
+		return bx::acos(bx::pow(treshold, 1.0f / _cosinePower));
+	}
+
+	float glossinessFor(float _mip, float _mipCount)
+	{
+		return bx::max(0.0f, 1.0f - _mip/(_mipCount-1.0000001f) );
+	}
+
+	float applyLightingModel(float _specularPower, LightingModel::Enum _lightingModel)
+	{
+		// Reference:
+		//  - https://web.archive.org/web/20180622232018/https://seblagarde.wordpress.com/2012/06/10/amd-cubemapgen-for-physically-based-rendering/
+		//  - https://web.archive.org/web/20180622232041/https://seblagarde.wordpress.com/2012/03/29/relationship-between-phong-and-blinn-lighting-model/
+		//
+		switch (_lightingModel)
+		{
+		case LightingModel::PhongBrdf: return _specularPower + 1.0f;
+		case LightingModel::Blinn:     return _specularPower/4.0f;
+		case LightingModel::BlinnBrdf: return _specularPower/4.0f + 1.0f;
+		default: break;
+		};
+
+		return _specularPower;
+	}
+
+	ImageContainer* imageCubemapRadianceFilter(bx::AllocatorI* _allocator, const ImageContainer& _image, LightingModel::Enum _lightingModel, bx::Error* _err)
+	{
+		if (!_image.m_cubeMap)
+		{
+			BX_ERROR_SET(_err, BIMG_ERROR, "Input image is not cubemap.");
+			return NULL;
+		}
+
+		ImageContainer* input = imageConvert(_allocator, TextureFormat::RGBA32F, _image, true);
+
+		if (1 >= input->m_numMips)
+		{
+			ImageContainer* temp = imageGenerateMips(_allocator, *input);
+			imageFree(input);
+			input = temp;
+		}
+
+		ImageContainer* output = imageAlloc(_allocator, TextureFormat::RGBA32F, uint16_t(input->m_width), uint16_t(input->m_width), 1, 1, true, true);
+
+		for (uint8_t side = 0; side < 6; ++side)
+		{
+			ImageMip srcMip;
+			imageGetRawData(*input, side, 0, input->m_data, input->m_size, srcMip);
+
+			ImageMip dstMip;
+			imageGetRawData(*output, side, 0, output->m_data, output->m_size, dstMip);
+
+			uint8_t* dstData = const_cast<uint8_t*>(dstMip.m_data);
+
+			bx::memCopy(dstData, srcMip.m_data, srcMip.m_size);
+		}
+
+		const float glossScale = 10.0f;
+		const float glossBias  = 1.0f;
+
+		for (uint8_t lod = 1, numMips = input->m_numMips; lod < numMips; ++lod)
+		{
+			ImageContainer* nsa = NULL;
+
+			if (LightingModel::Ggx != _lightingModel)
+			{
+				nsa = imageCubemapNormalSolidAngle(_allocator, bx::max<uint32_t>(_image.m_width>>lod, 1) );
+			}
+
+			for (uint8_t side = 0; side < 6; ++side)
+			{
+				ImageMip mip;
+				imageGetRawData(*output, side, lod, output->m_data, output->m_size, mip);
+
+				const uint32_t dstWidth = mip.m_width;
+				const uint32_t dstPitch = dstWidth*16;
+
+				const float minAngle = bx::atan2(1.0f, float(dstWidth) );
+				const float maxAngle = bx::kPiHalf;
+				const float toFilterSize     = 1.0f/(minAngle*dstWidth*2.0f);
+				const float glossiness       = glossinessFor(lod, float(numMips) );
+				const float roughness        = 1.0f-glossiness;
+				const float specularPowerRef = bx::pow(2.0f, glossiness*glossScale + glossBias);
+				const float specularPower    = applyLightingModel(specularPowerRef, _lightingModel);
+				const float filterAngle      = bx::clamp(cosinePowerFilterAngle(specularPower), minAngle, maxAngle);
+				const float cosAngle   = bx::max(0.0f, bx::cos(filterAngle) );
+				const float texelSize  = 1.0f/float(dstWidth);
+				const float filterSize = bx::max(texelSize, filterAngle * toFilterSize);
+
+				for (uint32_t yy = 0; yy < dstWidth; ++yy)
+				{
+					for (uint32_t xx = 0; xx < dstWidth; ++xx)
+					{
+						float* dstData = (float*)&mip.m_data[yy*dstPitch+xx*16];
+
+						const float uu = float(xx)*texelSize*2.0f - 1.0f;
+						const float vv = float(yy)*texelSize*2.0f - 1.0f;
+
+						float dir[3];
+						texelUvToDir(dir, side, uu, vv);
+
+						if (LightingModel::Ggx == _lightingModel)
+						{
+							processFilterAreaGgx(dstData, *input, lod, dir, roughness);
+						}
+						else
+						{
+							Aabb aabb[6];
+							calcFilterArea(aabb, dir, filterSize);
+
+							processFilterArea(dstData, *input, *nsa, lod, aabb, dir, specularPower, cosAngle);
+						}
+					}
+				}
+			}
+
+			if (NULL != nsa)
+			{
+				imageFree(nsa);
+			}
+		}
+
+		return output;
+	}
+
+} // namespace bimg

+ 16 - 12
src/image_encode.cpp

@@ -228,7 +228,10 @@ namespace bimg
 				break;
 
 			default:
-				BX_ERROR_SET(_err, BIMG_ERROR, "Unable to convert between input/output formats!");
+				if (!imageConvert(_allocator, _dst, _dstFormat, _src, _srcFormat, _width, _height, 1) )
+				{
+					BX_ERROR_SET(_err, BIMG_ERROR, "Unable to convert between input/output formats!");
+				}
 				break;
 		}
 	}
@@ -260,17 +263,18 @@ namespace bimg
 					imageGetRawData(*output, side, lod, output->m_data, output->m_size, dstMip);
 					uint8_t* dstData = const_cast<uint8_t*>(dstMip.m_data);
 
-					imageEncode(_allocator
-							, dstData
-							, mip.m_data
-							, mip.m_format
-							, mip.m_width
-							, mip.m_height
-							, mip.m_depth
-							, _dstFormat
-							, _quality
-							, &err
-							);
+					imageEncode(
+						  _allocator
+						, dstData
+						, mip.m_data
+						, mip.m_format
+						, mip.m_width
+						, mip.m_height
+						, mip.m_depth
+						, _dstFormat
+						, _quality
+						, &err
+						);
 				}
 			}
 		}

+ 153 - 41
tools/texturec/texturec.cpp

@@ -26,25 +26,12 @@
 #include <string>
 
 #define BIMG_TEXTUREC_VERSION_MAJOR 1
-#define BIMG_TEXTUREC_VERSION_MINOR 15
+#define BIMG_TEXTUREC_VERSION_MINOR 17
+
+BX_ERROR_RESULT(TEXTRUREC_ERROR, BX_MAKEFOURCC('t', 'c', 0, 0) );
 
 struct Options
 {
-	Options()
-		: maxSize(UINT32_MAX)
-		, edge(0.0f)
-		, format(bimg::TextureFormat::Count)
-		, quality(bimg::Quality::Default)
-		, mips(false)
-		, normalMap(false)
-		, equirect(false)
-		, iqa(false)
-		, pma(false)
-		, sdf(false)
-		, alphaTest(false)
-	{
-	}
-
 	void dump()
 	{
 		DBG("Options:\n"
@@ -56,6 +43,9 @@ struct Options
 			"\t      iqa: %s\n"
 			"\t      pma: %s\n"
 			"\t      sdf: %s\n"
+			"\t radiance: %s\n"
+			"\t equirect: %s\n"
+			"\t    strip: %s\n"
 			, maxSize
 			, edge
 			, bimg::getName(format)
@@ -64,20 +54,25 @@ struct Options
 			, iqa       ? "true" : "false"
 			, pma       ? "true" : "false"
 			, sdf       ? "true" : "false"
+			, radiance  ? "true" : "false"
+			, equirect  ? "true" : "false"
+			, strip     ? "true" : "false"
 			);
 	}
 
-	uint32_t maxSize;
-	float edge;
-	bimg::TextureFormat::Enum format;
-	bimg::Quality::Enum quality;
-	bool mips;
-	bool normalMap;
-	bool equirect;
-	bool iqa;
-	bool pma;
-	bool sdf;
-	bool alphaTest;
+	uint32_t maxSize = UINT32_MAX;
+	float edge       = 0.0f;
+	bimg::TextureFormat::Enum format   = bimg::TextureFormat::Count;
+	bimg::Quality::Enum quality        = bimg::Quality::Default;
+	bimg::LightingModel::Enum radiance = bimg::LightingModel::Count;
+	bool mips      = false;
+	bool normalMap = false;
+	bool equirect  = false;
+	bool strip     = false;
+	bool iqa       = false;
+	bool pma       = false;
+	bool sdf       = false;
+	bool alphaTest = false;
 };
 
 void imageRgba32fNormalize(void* _dst, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src)
@@ -168,9 +163,47 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 		uint32_t outputHeight = bx::uint32_max(blockHeight * minBlockY, ( (input->m_height + blockHeight - 1) / blockHeight)*blockHeight);
 		uint32_t outputDepth  = input->m_depth;
 
-		if (outputWidth  > _options.maxSize
-		||  outputHeight > _options.maxSize
-		||  outputDepth  > _options.maxSize)
+		if (_options.equirect)
+		{
+			if (outputDepth   == 1
+			&&  outputWidth/2 == outputHeight)
+			{
+				if (outputWidth/2 > _options.maxSize)
+				{
+					outputWidth  = _options.maxSize*4;
+					outputHeight = _options.maxSize*2;
+				}
+			}
+			else
+			{
+				bimg::imageFree(input);
+
+				BX_ERROR_SET(_err, TEXTRUREC_ERROR, "Input image format is not equirectangular projection.");
+				return NULL;
+			}
+		}
+		else if (_options.strip)
+		{
+			if (outputDepth   == 1
+			&&  outputWidth/6 == outputHeight)
+			{
+				if (outputWidth/6 > _options.maxSize)
+				{
+					outputWidth  = _options.maxSize*6;
+					outputHeight = _options.maxSize;
+				}
+			}
+			else
+			{
+				bimg::imageFree(input);
+
+				BX_ERROR_SET(_err, TEXTRUREC_ERROR, "Input image format is not horizontal strip.");
+				return NULL;
+			}
+		}
+		else if (outputWidth  > _options.maxSize
+			 ||  outputHeight > _options.maxSize
+			 ||  outputDepth  > _options.maxSize)
 		{
 			if (outputDepth > outputWidth
 			&&  outputDepth > outputHeight)
@@ -204,9 +237,10 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 			&& !_options.sdf
 			&& !_options.alphaTest
 			&& !_options.normalMap
-			&& !_options.equirect
+			&& !(_options.equirect || _options.strip)
 			&& !_options.iqa
 			&& !_options.pma
+			&& (bimg::LightingModel::Count == _options.radiance)
 			;
 
 		if (needResize)
@@ -261,13 +295,24 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 			return output;
 		}
 
-		if (_options.equirect)
+		if (_options.equirect
+		||  _options.strip)
 		{
 			bimg::ImageContainer* src = bimg::imageConvert(_allocator, bimg::TextureFormat::RGBA32F, *input);
 			bimg::imageFree(input);
 
-			bimg::ImageContainer* dst = bimg::imageCubemapFromLatLongRgba32F(_allocator, *src, true, _err);
-			bimg::imageFree(src);
+			bimg::ImageContainer* dst;
+
+			if (outputWidth/2 == outputHeight)
+			{
+				dst = bimg::imageCubemapFromLatLongRgba32F(_allocator, *src, true, _err);
+				bimg::imageFree(src);
+			}
+			else
+			{
+				dst = bimg::imageCubemapFromStripRgba32F(_allocator, *src, _err);
+				bimg::imageFree(src);
+			}
 
 			if (!_err->isOk() )
 			{
@@ -278,6 +323,27 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
 			bimg::imageFree(dst);
 		}
 
+		if (bimg::LightingModel::Count != _options.radiance)
+		{
+			output = bimg::imageCubemapRadianceFilter(_allocator, *input, _options.radiance, _err);
+
+			if (!_err->isOk() )
+			{
+				return NULL;
+			}
+
+			if (bimg::TextureFormat::RGBA32F != outputFormat)
+			{
+				bimg::ImageContainer* temp = bimg::imageEncode(_allocator, outputFormat, _options.quality, *output);
+				bimg::imageFree(output);
+
+				output = temp;
+			}
+
+			bimg::imageFree(input);
+			return output;
+		}
+
 		output = bimg::imageAlloc(
 			  _allocator
 			, outputFormat
@@ -749,7 +815,7 @@ void help(const char* _error = NULL, bool _showHelp = true)
 		  "    *.exr (input, output)  OpenEXR.\n"
 		  "    *.gif (input)          Graphics Interchange Format.\n"
 		  "    *.jpg (input)          JPEG Interchange Format.\n"
-		  "    *.hdr (input)          Radiance RGBE.\n"
+		  "    *.hdr (input, output)  Radiance RGBE.\n"
 		  "    *.ktx (input, output)  Khronos Texture.\n"
 		  "    *.png (input, output)  Portable Network Graphics.\n"
 		  "    *.psd (input)          Photoshop Document.\n"
@@ -766,13 +832,15 @@ void help(const char* _error = NULL, bool _showHelp = true)
 		  "  -q <quality>             Encoding quality (default, fastest, highest).\n"
 		  "  -m, --mips               Generate mip-maps.\n"
 		  "  -n, --normalmap          Input texture is normal map.\n"
-		  "      --equirect           Input texture equirectangular projection of cubemap.\n"
+		  "      --equirect           Input texture is equirectangular projection of cubemap.\n"
+		  "      --strip              Input texture is horizontal strip of cubemap.\n"
 		  "      --sdf <edge>         Compute SDF texture.\n"
 		  "      --ref <alpha>        Alpha reference value.\n"
 		  "      --iqa                Image Quality Assessment\n"
 		  "      --pma                Premultiply alpha into RGB channel.\n"
 		  "      --max <max size>     Maximum width/height (image will be scaled down and\n"
 		  "                           aspect ratio will be preserved.\n"
+		  "      --radiance <model>   Radiance cubemap filter. (Lighting model: Phong, PhongBrdf, Blinn, BlinnBrdf, GGX)\n"
 		  "      --as <extension>     Save as.\n"
 		  "      --validate           *DEBUG* Validate that output image produced matches after loading.\n"
 
@@ -860,6 +928,7 @@ int main(int _argc, const char* _argv[])
 	saveAs = NULL == saveAs ? bx::strFindI(outputFileName, ".dds") : saveAs;
 	saveAs = NULL == saveAs ? bx::strFindI(outputFileName, ".png") : saveAs;
 	saveAs = NULL == saveAs ? bx::strFindI(outputFileName, ".exr") : saveAs;
+	saveAs = NULL == saveAs ? bx::strFindI(outputFileName, ".hdr") : saveAs;
 	if (NULL == saveAs)
 	{
 		help("Output file format must be specified.");
@@ -890,16 +959,28 @@ int main(int _argc, const char* _argv[])
 		}
 	}
 
-	options.mips      = cmdLine.hasArg('m',  "mips");
-	options.normalMap = cmdLine.hasArg('n',  "normalmap");
+	options.mips      = cmdLine.hasArg('m', "mips");
+	options.normalMap = cmdLine.hasArg('n', "normalmap");
 	options.equirect  = cmdLine.hasArg("equirect");
+	options.strip     = cmdLine.hasArg("strip");
 	options.iqa       = cmdLine.hasArg("iqa");
 	options.pma       = cmdLine.hasArg("pma");
 
+	if (options.equirect
+	&&  options.strip)
+	{
+		help("Image can't be equirect and strip at the same time.");
+		return bx::kExitFailure;
+	}
+
 	const char* maxSize = cmdLine.findOption("max");
 	if (NULL != maxSize)
 	{
-		options.maxSize = atoi(maxSize);
+		if (!bx::fromString(&options.maxSize, maxSize) )
+		{
+			help("Parsing max size failed.");
+			return bx::kExitFailure;
+		}
 	}
 
 	options.format = bimg::TextureFormat::Count;
@@ -954,6 +1035,21 @@ int main(int _argc, const char* _argv[])
 		}
 	}
 
+	const char* radiance = cmdLine.findOption("radiance");
+	if (NULL != radiance)
+	{
+		if      (0 == bx::strCmpI(radiance, "phong"    ) ) { options.radiance = bimg::LightingModel::Phong; }
+		else if (0 == bx::strCmpI(radiance, "phongbrdf") ) { options.radiance = bimg::LightingModel::PhongBrdf; }
+		else if (0 == bx::strCmpI(radiance, "blinn"    ) ) { options.radiance = bimg::LightingModel::Blinn; }
+		else if (0 == bx::strCmpI(radiance, "blinnbrdf") ) { options.radiance = bimg::LightingModel::BlinnBrdf; }
+		else if (0 == bx::strCmpI(radiance, "ggx"      ) ) { options.radiance = bimg::LightingModel::Ggx; }
+		else
+		{
+			help("Invalid radiance lighting model specified.");
+			return bx::kExitFailure;
+		}
+	}
+
 	const bool validate = cmdLine.hasArg("validate");
 
 	bx::Error err;
@@ -1019,7 +1115,8 @@ int main(int _argc, const char* _argv[])
 					, mip.m_data
 					, output->m_format
 					, false
-					, &err);
+					, &err
+					);
 			}
 			else if (NULL != bx::strFindI(saveAs, "exr") )
 			{
@@ -1032,7 +1129,22 @@ int main(int _argc, const char* _argv[])
 					, mip.m_data
 					, output->m_format
 					, false
-					, &err);
+					, &err
+					);
+			}
+			else if (NULL != bx::strFindI(saveAs, "hdr") )
+			{
+				bimg::ImageMip mip;
+				bimg::imageGetRawData(*output, 0, 0, output->m_data, output->m_size, mip);
+				bimg::imageWriteHdr(&writer
+					, mip.m_width
+					, mip.m_height
+					, mip.m_width*getBitsPerPixel(mip.m_format)/8
+					, mip.m_data
+					, output->m_format
+					, false
+					, &err
+					);
 			}
 
 			bx::close(&writer);

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff