1 năm trước cách đây · 200ed0971a
--- a/editor/import/resource_importer_layered_texture.cpp
+++ b/editor/import/resource_importer_layered_texture.cpp
@@ -335,11 +335,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
 
				 		return err;
			
 
				 	}
			
 
				 
			
 
				-	if (compress_mode == COMPRESS_BASIS_UNIVERSAL && image->get_format() >= Image::FORMAT_RF) {
			
 
				-		//basis universal does not support float formats, fall back
			
 
				-		compress_mode = COMPRESS_VRAM_COMPRESSED;
			
 
				-	}
			
 
				-
			
 
				 	if (compress_mode == COMPRESS_VRAM_COMPRESSED) {
			
 
				 		//if using video ram, optimize
			
 
				 		if (channel_pack == 0) {
			
--- a/editor/import/resource_importer_texture.cpp
+++ b/editor/import/resource_importer_texture.cpp
@@ -593,11 +593,6 @@ Error ResourceImporterTexture::import(const String &p_source_file, const String
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (compress_mode == COMPRESS_BASIS_UNIVERSAL && image->get_format() >= Image::FORMAT_RF) {
			
 
				-		// Basis universal does not support float formats, fallback.
			
 
				-		compress_mode = COMPRESS_VRAM_COMPRESSED;
			
 
				-	}
			
 
				-
			
 
				 	bool detect_3d = int(p_options["detect_3d/compress_to"]) > 0;
			
 
				 	bool detect_roughness = roughness == 0;
			
 
				 	bool detect_normal = normal == 0;
			
--- a/modules/basis_universal/SCsub
+++ b/modules/basis_universal/SCsub
@@ -14,6 +14,8 @@ thirdparty_obj = []
 
				 thirdparty_dir = "#thirdparty/basis_universal/"
			
 
				 # Sync list with upstream CMakeLists.txt
			
 
				 encoder_sources = [
			
 
				+    "3rdparty/android_astc_decomp.cpp",
			
 
				+    "basisu_astc_hdr_enc.cpp",
			
 
				     "basisu_backend.cpp",
			
 
				     "basisu_basis_file.cpp",
			
 
				     "basisu_bc7enc.cpp",
			
@@ -45,6 +47,8 @@ else:
 
				 if env["builtin_zstd"]:
			
 
				     env_basisu.Prepend(CPPPATH=["#thirdparty/zstd"])
			
 
				 
			
 
				+env_basisu.Prepend(CPPPATH=["#thirdparty/tinyexr"])
			
 
				+
			
 
				 if env.dev_build:
			
 
				     env_basisu.Append(CPPDEFINES=[("BASISU_DEVEL_MESSAGES", 1), ("BASISD_ENABLE_DEBUG_FLAGS", 1)])
			
 
				 
			
--- a/modules/basis_universal/image_compress_basisu.cpp
+++ b/modules/basis_universal/image_compress_basisu.cpp
@@ -30,6 +30,8 @@
 
				 
			
 
				 #include "image_compress_basisu.h"
			
 
				 
			
 
				+#include "core/os/os.h"
			
 
				+#include "core/string/print_string.h"
			
 
				 #include "servers/rendering_server.h"
			
 
				 
			
 
				 #include <transcoder/basisu_transcoder.h>
			
@@ -46,9 +48,48 @@ void basis_universal_init() {
 
				 }
			
 
				 
			
 
				 #ifdef TOOLS_ENABLED
			
 
				+template <typename T>
			
 
				+inline void _basisu_pad_mipmap(const uint8_t *p_image_mip_data, Vector<uint8_t> &r_mip_data_padded, int p_next_width, int p_next_height, int p_width, int p_height, int64_t p_size) {
			
 
				+	// Source mip's data interpreted as 32-bit RGBA blocks to help with copying pixel data.
			
 
				+	const T *mip_src_data = reinterpret_cast<const T *>(p_image_mip_data);
			
 
				+
			
 
				+	// Reserve space in the padded buffer.
			
 
				+	r_mip_data_padded.resize(p_next_width * p_next_height * sizeof(T));
			
 
				+	T *data_padded_ptr = reinterpret_cast<T *>(r_mip_data_padded.ptrw());
			
 
				+
			
 
				+	// Pad mipmap to the nearest block by smearing.
			
 
				+	int x = 0, y = 0;
			
 
				+	for (y = 0; y < p_height; y++) {
			
 
				+		for (x = 0; x < p_width; x++) {
			
 
				+			data_padded_ptr[p_next_width * y + x] = mip_src_data[p_width * y + x];
			
 
				+		}
			
 
				+
			
 
				+		// First, smear in x.
			
 
				+		for (; x < p_next_width; x++) {
			
 
				+			data_padded_ptr[p_next_width * y + x] = data_padded_ptr[p_next_width * y + x - 1];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Then, smear in y.
			
 
				+	for (; y < p_next_height; y++) {
			
 
				+		for (x = 0; x < p_next_width; x++) {
			
 
				+			data_padded_ptr[p_next_width * y + x] = data_padded_ptr[p_next_width * y + x - p_next_width];
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedChannels p_channels) {
			
 
				+	uint64_t start_time = OS::get_singleton()->get_ticks_msec();
			
 
				+
			
 
				 	Ref<Image> image = p_image->duplicate();
			
 
				-	image->convert(Image::FORMAT_RGBA8);
			
 
				+	bool is_hdr = false;
			
 
				+
			
 
				+	if (image->get_format() <= Image::FORMAT_RGB565) {
			
 
				+		image->convert(Image::FORMAT_RGBA8);
			
 
				+	} else if (image->get_format() <= Image::FORMAT_RGBE9995) {
			
 
				+		image->convert(Image::FORMAT_RGBAF);
			
 
				+		is_hdr = true;
			
 
				+	}
			
 
				 
			
 
				 	basisu::basis_compressor_params params;
			
 
				 
			
@@ -74,32 +115,42 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 
				 	basisu::job_pool job_pool(OS::get_singleton()->get_processor_count());
			
 
				 	params.m_pJob_pool = &job_pool;
			
 
				 
			
 
				-	BasisDecompressFormat decompress_format = BASIS_DECOMPRESS_RG;
			
 
				-	switch (p_channels) {
			
 
				-		case Image::USED_CHANNELS_L: {
			
 
				-			decompress_format = BASIS_DECOMPRESS_RGB;
			
 
				-		} break;
			
 
				-		case Image::USED_CHANNELS_LA: {
			
 
				-			params.m_force_alpha = true;
			
 
				-			decompress_format = BASIS_DECOMPRESS_RGBA;
			
 
				-		} break;
			
 
				-		case Image::USED_CHANNELS_R: {
			
 
				-			decompress_format = BASIS_DECOMPRESS_R;
			
 
				-		} break;
			
 
				-		case Image::USED_CHANNELS_RG: {
			
 
				-			params.m_force_alpha = true;
			
 
				-			image->convert_rg_to_ra_rgba8();
			
 
				-			decompress_format = BASIS_DECOMPRESS_RG;
			
 
				-		} break;
			
 
				-		case Image::USED_CHANNELS_RGB: {
			
 
				-			decompress_format = BASIS_DECOMPRESS_RGB;
			
 
				-		} break;
			
 
				-		case Image::USED_CHANNELS_RGBA: {
			
 
				-			params.m_force_alpha = true;
			
 
				-			decompress_format = BASIS_DECOMPRESS_RGBA;
			
 
				-		} break;
			
 
				+	BasisDecompressFormat decompress_format = BASIS_DECOMPRESS_MAX;
			
 
				+
			
 
				+	if (is_hdr) {
			
 
				+		decompress_format = BASIS_DECOMPRESS_HDR_RGB;
			
 
				+		params.m_hdr = true;
			
 
				+		params.m_uastc_hdr_options.set_quality_level(0);
			
 
				+
			
 
				+	} else {
			
 
				+		switch (p_channels) {
			
 
				+			case Image::USED_CHANNELS_L: {
			
 
				+				decompress_format = BASIS_DECOMPRESS_RGB;
			
 
				+			} break;
			
 
				+			case Image::USED_CHANNELS_LA: {
			
 
				+				params.m_force_alpha = true;
			
 
				+				decompress_format = BASIS_DECOMPRESS_RGBA;
			
 
				+			} break;
			
 
				+			case Image::USED_CHANNELS_R: {
			
 
				+				decompress_format = BASIS_DECOMPRESS_R;
			
 
				+			} break;
			
 
				+			case Image::USED_CHANNELS_RG: {
			
 
				+				params.m_force_alpha = true;
			
 
				+				image->convert_rg_to_ra_rgba8();
			
 
				+				decompress_format = BASIS_DECOMPRESS_RG;
			
 
				+			} break;
			
 
				+			case Image::USED_CHANNELS_RGB: {
			
 
				+				decompress_format = BASIS_DECOMPRESS_RGB;
			
 
				+			} break;
			
 
				+			case Image::USED_CHANNELS_RGBA: {
			
 
				+				params.m_force_alpha = true;
			
 
				+				decompress_format = BASIS_DECOMPRESS_RGBA;
			
 
				+			} break;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				+	ERR_FAIL_COND_V(decompress_format == BASIS_DECOMPRESS_MAX, Vector<uint8_t>());
			
 
				+
			
 
				 	// Copy the source image data with mipmaps into BasisU.
			
 
				 	{
			
 
				 		const int orig_width = image->get_width();
			
@@ -113,9 +164,10 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 
				 
			
 
				 		Vector<uint8_t> image_data = image->get_data();
			
 
				 		basisu::vector<basisu::image> basisu_mipmaps;
			
 
				+		basisu::vector<basisu::imagef> basisu_mipmaps_hdr;
			
 
				 
			
 
				 		// Buffer for storing padded mipmap data.
			
 
				-		Vector<uint32_t> mip_data_padded;
			
 
				+		Vector<uint8_t> mip_data_padded;
			
 
				 
			
 
				 		for (int32_t i = 0; i <= image->get_mipmap_count(); i++) {
			
 
				 			int64_t ofs, size;
			
@@ -126,31 +178,10 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 
				 
			
 
				 			// Pad the mipmap's data if its resolution isn't divisible by 4.
			
 
				 			if (image->has_mipmaps() && !is_res_div_4 && (width > 2 && height > 2) && (width != next_width || height != next_height)) {
			
 
				-				// Source mip's data interpreted as 32-bit RGBA blocks to help with copying pixel data.
			
 
				-				const uint32_t *mip_src_data = reinterpret_cast<const uint32_t *>(image_mip_data);
			
 
				-
			
 
				-				// Reserve space in the padded buffer.
			
 
				-				mip_data_padded.resize(next_width * next_height);
			
 
				-				uint32_t *data_padded_ptr = mip_data_padded.ptrw();
			
 
				-
			
 
				-				// Pad mipmap to the nearest block by smearing.
			
 
				-				int x = 0, y = 0;
			
 
				-				for (y = 0; y < height; y++) {
			
 
				-					for (x = 0; x < width; x++) {
			
 
				-						data_padded_ptr[next_width * y + x] = mip_src_data[width * y + x];
			
 
				-					}
			
 
				-
			
 
				-					// First, smear in x.
			
 
				-					for (; x < next_width; x++) {
			
 
				-						data_padded_ptr[next_width * y + x] = data_padded_ptr[next_width * y + x - 1];
			
 
				-					}
			
 
				-				}
			
 
				-
			
 
				-				// Then, smear in y.
			
 
				-				for (; y < next_height; y++) {
			
 
				-					for (x = 0; x < next_width; x++) {
			
 
				-						data_padded_ptr[next_width * y + x] = data_padded_ptr[next_width * y + x - next_width];
			
 
				-					}
			
 
				+				if (is_hdr) {
			
 
				+					_basisu_pad_mipmap<BasisRGBAF>(image_mip_data, mip_data_padded, next_width, next_height, width, height, size);
			
 
				+				} else {
			
 
				+					_basisu_pad_mipmap<uint32_t>(image_mip_data, mip_data_padded, next_width, next_height, width, height, size);
			
 
				 				}
			
 
				 
			
 
				 				// Override the image_mip_data pointer with our temporary Vector.
			
@@ -159,7 +190,7 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 
				 				// Override the mipmap's properties.
			
 
				 				width = next_width;
			
 
				 				height = next_height;
			
 
				-				size = mip_data_padded.size() * 4;
			
 
				+				size = mip_data_padded.size();
			
 
				 			}
			
 
				 
			
 
				 			// Get the next mipmap's resolution.
			
@@ -167,44 +198,61 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 
				 			next_height /= 2;
			
 
				 
			
 
				 			// Copy the source mipmap's data to a BasisU image.
			
 
				-			basisu::image basisu_image(width, height);
			
 
				-			memcpy(basisu_image.get_ptr(), image_mip_data, size);
			
 
				+			if (is_hdr) {
			
 
				+				basisu::imagef basisu_image(width, height);
			
 
				+				memcpy(reinterpret_cast<uint8_t *>(basisu_image.get_ptr()), image_mip_data, size);
			
 
				+
			
 
				+				if (i == 0) {
			
 
				+					params.m_source_images_hdr.push_back(basisu_image);
			
 
				+				} else {
			
 
				+					basisu_mipmaps_hdr.push_back(basisu_image);
			
 
				+				}
			
 
				 
			
 
				-			if (i == 0) {
			
 
				-				params.m_source_images.push_back(basisu_image);
			
 
				 			} else {
			
 
				-				basisu_mipmaps.push_back(basisu_image);
			
 
				+				basisu::image basisu_image(width, height);
			
 
				+				memcpy(basisu_image.get_ptr(), image_mip_data, size);
			
 
				+
			
 
				+				if (i == 0) {
			
 
				+					params.m_source_images.push_back(basisu_image);
			
 
				+				} else {
			
 
				+					basisu_mipmaps.push_back(basisu_image);
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		params.m_source_mipmap_images.push_back(basisu_mipmaps);
			
 
				+		if (is_hdr) {
			
 
				+			params.m_source_mipmap_images_hdr.push_back(basisu_mipmaps_hdr);
			
 
				+		} else {
			
 
				+			params.m_source_mipmap_images.push_back(basisu_mipmaps);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	// Encode the image data.
			
 
				-	Vector<uint8_t> basisu_data;
			
 
				-
			
 
				 	basisu::basis_compressor compressor;
			
 
				 	compressor.init(params);
			
 
				 
			
 
				 	int basisu_err = compressor.process();
			
 
				-	ERR_FAIL_COND_V(basisu_err != basisu::basis_compressor::cECSuccess, basisu_data);
			
 
				+	ERR_FAIL_COND_V(basisu_err != basisu::basis_compressor::cECSuccess, Vector<uint8_t>());
			
 
				 
			
 
				-	const basisu::uint8_vec &basisu_out = compressor.get_output_basis_file();
			
 
				-	basisu_data.resize(basisu_out.size() + 4);
			
 
				+	const basisu::uint8_vec &basisu_encoded = compressor.get_output_basis_file();
			
 
				 
			
 
				-	// Copy the encoded data to the buffer.
			
 
				-	{
			
 
				-		uint8_t *wb = basisu_data.ptrw();
			
 
				-		*(uint32_t *)wb = decompress_format;
			
 
				+	Vector<uint8_t> basisu_data;
			
 
				+	basisu_data.resize(basisu_encoded.size() + 4);
			
 
				+	uint8_t *basisu_data_ptr = basisu_data.ptrw();
			
 
				 
			
 
				-		memcpy(wb + 4, basisu_out.get_ptr(), basisu_out.size());
			
 
				-	}
			
 
				+	// Copy the encoded BasisU data into the output buffer.
			
 
				+	*(uint32_t *)basisu_data_ptr = decompress_format;
			
 
				+	memcpy(basisu_data_ptr + 4, basisu_encoded.get_ptr(), basisu_encoded.size());
			
 
				+
			
 
				+	print_verbose(vformat("BasisU: Encoding a %dx%d image with %d mipmaps took %d ms.", p_image->get_width(), p_image->get_height(), p_image->get_mipmap_count(), OS::get_singleton()->get_ticks_msec() - start_time));
			
 
				 
			
 
				 	return basisu_data;
			
 
				 }
			
 
				 #endif // TOOLS_ENABLED
			
 
				 
			
 
				 Ref<Image> basis_universal_unpacker_ptr(const uint8_t *p_data, int p_size) {
			
 
				+	uint64_t start_time = OS::get_singleton()->get_ticks_msec();
			
 
				+
			
 
				 	Ref<Image> image;
			
 
				 	ERR_FAIL_NULL_V_MSG(p_data, image, "Cannot unpack invalid BasisUniversal data.");
			
 
				 
			
@@ -320,6 +368,23 @@ Ref<Image> basis_universal_unpacker_ptr(const uint8_t *p_data, int p_size) {
 
				 			}
			
 
				 
			
 
				 		} break;
			
 
				+		case BASIS_DECOMPRESS_HDR_RGB: {
			
 
				+			if (bptc_supported) {
			
 
				+				basisu_format = basist::transcoder_texture_format::cTFBC6H;
			
 
				+				image_format = Image::FORMAT_BPTC_RGBFU;
			
 
				+			} else if (astc_supported) {
			
 
				+				basisu_format = basist::transcoder_texture_format::cTFASTC_HDR_4x4_RGBA;
			
 
				+				image_format = Image::FORMAT_ASTC_4x4_HDR;
			
 
				+			} else {
			
 
				+				// No supported VRAM compression formats, decompress.
			
 
				+				basisu_format = basist::transcoder_texture_format::cTFRGB_9E5;
			
 
				+				image_format = Image::FORMAT_RGBE9995;
			
 
				+			}
			
 
				+
			
 
				+		} break;
			
 
				+		default: {
			
 
				+			ERR_FAIL_V(image);
			
 
				+		} break;
			
 
				 	}
			
 
				 
			
 
				 	src_ptr += 4;
			
@@ -371,6 +436,9 @@ Ref<Image> basis_universal_unpacker_ptr(const uint8_t *p_data, int p_size) {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	print_verbose(vformat("BasisU: Transcoding a %dx%d image with %d mipmaps into %s took %d ms.",
			
 
				+			image->get_width(), image->get_height(), image->get_mipmap_count(), Image::get_format_name(image_format), OS::get_singleton()->get_ticks_msec() - start_time));
			
 
				+
			
 
				 	return image;
			
 
				 }
			
 
				 
			
--- a/modules/basis_universal/image_compress_basisu.h
+++ b/modules/basis_universal/image_compress_basisu.h
@@ -39,11 +39,20 @@ enum BasisDecompressFormat {
 
				 	BASIS_DECOMPRESS_RGBA,
			
 
				 	BASIS_DECOMPRESS_RG_AS_RA,
			
 
				 	BASIS_DECOMPRESS_R,
			
 
				+	BASIS_DECOMPRESS_HDR_RGB,
			
 
				+	BASIS_DECOMPRESS_MAX
			
 
				 };
			
 
				 
			
 
				 void basis_universal_init();
			
 
				 
			
 
				 #ifdef TOOLS_ENABLED
			
 
				+struct BasisRGBAF {
			
 
				+	uint32_t r;
			
 
				+	uint32_t g;
			
 
				+	uint32_t b;
			
 
				+	uint32_t a;
			
 
				+};
			
 
				+
			
 
				 Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedChannels p_channels);
			
 
				 #endif
			
 
				 
			
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -59,12 +59,13 @@ Files extracted from upstream source:
 
				 ## basis_universal
			
 
				 
			
 
				 - Upstream: https://github.com/BinomialLLC/basis_universal
			
 
				-- Version: 1.16.4 (900e40fb5d2502927360fe2f31762bdbb624455f, 2023)
			
 
				+- Version: 1.50.0 (051ad6d8a64bb95a79e8601c317055fd1782ad3e, 2024)
			
 
				 - License: Apache 2.0
			
 
				 
			
 
				 Files extracted from upstream source:
			
 
				 
			
 
				-- `encoder/` and `transcoder/` folders, minus `jpgd.{cpp,h}`
			
 
				+- `encoder/` and `transcoder/` folders, with the following files removed from `encoder`:
			
 
				+  `jpgd.{cpp,h}`, `3rdparty/{qoi.h,tinydds.h,tinyexr.cpp,tinyexr.h}`
			
 
				 - `LICENSE`
			
 
				 
			
 
				 Applied upstream PR https://github.com/BinomialLLC/basis_universal/pull/344 to
			
--- a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
+++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
@@ -0,0 +1,2052 @@
 
				+// File: android_astc_decomp.cpp
			
 
				+
			
 
				+/*-------------------------------------------------------------------------
			
 
				+ * drawElements Quality Program Tester Core
			
 
				+ * ----------------------------------------
			
 
				+ *
			
 
				+ * Copyright 2016 The Android Open Source Project
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ *
			
 
				+ * rg: Removed external dependencies, minor fix to decompress() so it converts non-sRGB
			
 
				+ * output to 8-bits correctly. I've compared this decoder's output
			
 
				+ * vs. astc-codec with random inputs.
			
 
				+ * 
			
 
				+ *//*!
			
 
				+ * \file
			
 
				+ * \brief ASTC Utilities.
			
 
				+ *//*--------------------------------------------------------------------*/
			
 
				+#include "android_astc_decomp.h"
			
 
				+#include <assert.h>
			
 
				+#include <algorithm>
			
 
				+#include <fenv.h>
			
 
				+#include <math.h>
			
 
				+
			
 
				+#define DE_LENGTH_OF_ARRAY(x) (sizeof(x)/sizeof(x[0]))
			
 
				+#define DE_UNREF(x) (void)x
			
 
				+
			
 
				+typedef uint8_t deUint8;
			
 
				+typedef int8_t deInt8;
			
 
				+typedef uint32_t deUint32;
			
 
				+typedef int32_t deInt32;
			
 
				+typedef uint16_t deUint16;
			
 
				+typedef int16_t deInt16;
			
 
				+typedef int64_t deInt64;
			
 
				+typedef uint64_t deUint64;
			
 
				+
			
 
				+#define DE_ASSERT assert
			
 
				+
			
 
				+#ifdef _MSC_VER
			
 
				+#pragma warning (disable:4505) // unreferenced local function has been removed
			
 
				+#elif defined(__GNUC__)
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wunused-function"
			
 
				+#endif
			
 
				+
			
 
				+namespace basisu_astc
			
 
				+{
			
 
				+    template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
			
 
				+    template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
			
 
				+    template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
			
 
				+
			
 
				+    static bool inBounds(int v, int l, int h)
			
 
				+    {
			
 
				+        return (v >= l) && (v < h);
			
 
				+    }
			
 
				+
			
 
				+    static bool inRange(int v, int l, int h)
			
 
				+    {
			
 
				+        return (v >= l) && (v <= h);
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    static inline T max(T a, T b)
			
 
				+    {
			
 
				+        return (a > b) ? a : b;
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    static inline T min(T a, T b)
			
 
				+    {
			
 
				+        return (a < b) ? a : b;
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    static inline T clamp(T a, T l, T h)
			
 
				+    {
			
 
				+        if (a < l)
			
 
				+            return l;
			
 
				+        else if (a > h)
			
 
				+            return h;
			
 
				+        return a;
			
 
				+    }
			
 
				+
			
 
				+    struct UVec4
			
 
				+    {
			
 
				+        uint32_t m_c[4];
			
 
				+
			
 
				+        UVec4()
			
 
				+        {
			
 
				+            m_c[0] = 0;
			
 
				+            m_c[1] = 0;
			
 
				+            m_c[2] = 0;
			
 
				+            m_c[3] = 0;
			
 
				+        }
			
 
				+
			
 
				+        UVec4(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
			
 
				+        {
			
 
				+            m_c[0] = x;
			
 
				+            m_c[1] = y;
			
 
				+            m_c[2] = z;
			
 
				+            m_c[3] = w;
			
 
				+        }
			
 
				+
			
 
				+        uint32_t x() const { return m_c[0]; }
			
 
				+        uint32_t y() const { return m_c[1]; }
			
 
				+        uint32_t z() const { return m_c[2]; }
			
 
				+        uint32_t w() const { return m_c[3]; }
			
 
				+
			
 
				+        uint32_t& x() { return m_c[0]; }
			
 
				+        uint32_t& y() { return m_c[1]; }
			
 
				+        uint32_t& z() { return m_c[2]; }
			
 
				+        uint32_t& w() { return m_c[3]; }
			
 
				+
			
 
				+        uint32_t operator[] (uint32_t idx) const { assert(idx < 4);  return m_c[idx]; }
			
 
				+        uint32_t& operator[] (uint32_t idx) { assert(idx < 4);  return m_c[idx]; }
			
 
				+    };
			
 
				+
			
 
				+    struct IVec4
			
 
				+    {
			
 
				+        int32_t m_c[4];
			
 
				+
			
 
				+        IVec4()
			
 
				+        {
			
 
				+            m_c[0] = 0;
			
 
				+            m_c[1] = 0;
			
 
				+            m_c[2] = 0;
			
 
				+            m_c[3] = 0;
			
 
				+        }
			
 
				+
			
 
				+        IVec4(int32_t x, int32_t y, int32_t z, int32_t w)
			
 
				+        {
			
 
				+            m_c[0] = x;
			
 
				+            m_c[1] = y;
			
 
				+            m_c[2] = z;
			
 
				+            m_c[3] = w;
			
 
				+        }
			
 
				+
			
 
				+        int32_t x() const { return m_c[0]; }
			
 
				+        int32_t y() const { return m_c[1]; }
			
 
				+        int32_t z() const { return m_c[2]; }
			
 
				+        int32_t w() const { return m_c[3]; }
			
 
				+
			
 
				+        int32_t& x() { return m_c[0]; }
			
 
				+        int32_t& y() { return m_c[1]; }
			
 
				+        int32_t& z() { return m_c[2]; }
			
 
				+        int32_t& w() { return m_c[3]; }
			
 
				+
			
 
				+        UVec4 asUint() const
			
 
				+        {
			
 
				+            return UVec4(maximum(0, m_c[0]), maximum(0, m_c[1]), maximum(0, m_c[2]), maximum(0, m_c[3]));
			
 
				+        }
			
 
				+
			
 
				+        int32_t operator[] (uint32_t idx) const { assert(idx < 4);  return m_c[idx]; }
			
 
				+        int32_t& operator[] (uint32_t idx) { assert(idx < 4);  return m_c[idx]; }
			
 
				+    };
			
 
				+
			
 
				+    struct IVec3
			
 
				+    {
			
 
				+        int32_t m_c[3];
			
 
				+
			
 
				+        IVec3()
			
 
				+        {
			
 
				+            m_c[0] = 0;
			
 
				+            m_c[1] = 0;
			
 
				+            m_c[2] = 0;
			
 
				+        }
			
 
				+
			
 
				+        IVec3(int32_t x, int32_t y, int32_t z)
			
 
				+        {
			
 
				+            m_c[0] = x;
			
 
				+            m_c[1] = y;
			
 
				+            m_c[2] = z;
			
 
				+        }
			
 
				+
			
 
				+        int32_t x() const { return m_c[0]; }
			
 
				+        int32_t y() const { return m_c[1]; }
			
 
				+        int32_t z() const { return m_c[2]; }
			
 
				+
			
 
				+        int32_t& x() { return m_c[0]; }
			
 
				+        int32_t& y() { return m_c[1]; }
			
 
				+        int32_t& z() { return m_c[2]; }
			
 
				+
			
 
				+        int32_t operator[] (uint32_t idx) const { assert(idx < 3);  return m_c[idx]; }
			
 
				+        int32_t& operator[] (uint32_t idx) { assert(idx < 3);  return m_c[idx]; }
			
 
				+    };
			
 
				+
			
 
				+    static uint32_t deDivRoundUp32(uint32_t a, uint32_t b)
			
 
				+    {
			
 
				+        return (a + b - 1) / b;
			
 
				+    }
			
 
				+
			
 
				+    static bool deInBounds32(uint32_t v, uint32_t l, uint32_t h)
			
 
				+    {
			
 
				+        return (v >= l) && (v < h);
			
 
				+    }
			
 
				+
			
 
				+namespace astc 
			
 
				+{
			
 
				+
			
 
				+using std::vector;
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+
			
 
				+// Common utilities
			
 
				+enum
			
 
				+{
			
 
				+    MAX_BLOCK_WIDTH     = 12,
			
 
				+    MAX_BLOCK_HEIGHT    = 12
			
 
				+};
			
 
				+
			
 
				+inline deUint32 getBit (deUint32 src, int ndx)
			
 
				+{
			
 
				+    DE_ASSERT(basisu_astc::inBounds(ndx, 0, 32));
			
 
				+    return (src >> ndx) & 1;
			
 
				+}
			
 
				+
			
 
				+inline deUint32 getBits (deUint32 src, int low, int high)
			
 
				+{
			
 
				+    const int numBits = (high-low) + 1;
			
 
				+    DE_ASSERT(basisu_astc::inRange(numBits, 1, 32));
			
 
				+
			
 
				+    if (numBits < 32)
			
 
				+        return (deUint32)((src >> low) & ((1u<<numBits)-1));
			
 
				+    else
			
 
				+        return (deUint32)((src >> low) & 0xFFFFFFFFu);
			
 
				+}
			
 
				+
			
 
				+inline bool isBitSet (deUint32 src, int ndx)
			
 
				+{
			
 
				+    return getBit(src, ndx) != 0;
			
 
				+}
			
 
				+
			
 
				+inline deUint32 reverseBits (deUint32 src, int numBits)
			
 
				+{
			
 
				+    DE_ASSERT(basisu_astc::inRange(numBits, 0, 32));
			
 
				+    
			
 
				+    deUint32 result = 0;
			
 
				+    for (int i = 0; i < numBits; i++)
			
 
				+        result |= ((src >> i) & 1) << (numBits-1-i);
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits)
			
 
				+{
			
 
				+    DE_ASSERT(numSrcBits <= numDstBits);
			
 
				+    DE_ASSERT((src & ((1<<numSrcBits)-1)) == src);
			
 
				+
			
 
				+    deUint32 dst = 0;
			
 
				+    for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
			
 
				+        dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
			
 
				+
			
 
				+    return dst;
			
 
				+}
			
 
				+
			
 
				+inline deInt32 signExtend (deInt32 src, int numSrcBits)
			
 
				+{
			
 
				+    DE_ASSERT(basisu_astc::inRange(numSrcBits, 2, 31));
			
 
				+
			
 
				+    const bool negative = (src & (1 << (numSrcBits-1))) != 0;
			
 
				+    return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
			
 
				+}
			
 
				+
			
 
				+typedef uint16_t deFloat16;
			
 
				+
			
 
				+inline bool isFloat16InfOrNan (deFloat16 v)
			
 
				+{
			
 
				+    return getBits(v, 10, 14) == 31;
			
 
				+}
			
 
				+
			
 
				+float deFloat16To32(deFloat16 val16)
			
 
				+{
			
 
				+    deUint32 sign;
			
 
				+    deUint32 expotent;
			
 
				+    deUint32 mantissa;
			
 
				+
			
 
				+    union
			
 
				+    {
			
 
				+        float       f;
			
 
				+        deUint32    u;
			
 
				+    } x;
			
 
				+
			
 
				+    x.u = 0u;
			
 
				+
			
 
				+    sign = ((deUint32)val16 >> 15u) & 0x00000001u;
			
 
				+    expotent = ((deUint32)val16 >> 10u) & 0x0000001fu;
			
 
				+    mantissa = (deUint32)val16 & 0x000003ffu;
			
 
				+
			
 
				+    if (expotent == 0u)
			
 
				+    {
			
 
				+        if (mantissa == 0u)
			
 
				+        {
			
 
				+            /* +/- 0 */
			
 
				+            x.u = sign << 31u;
			
 
				+            return x.f;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            /* Denormalized, normalize it. */
			
 
				+
			
 
				+            while (!(mantissa & 0x00000400u))
			
 
				+            {
			
 
				+                mantissa <<= 1u;
			
 
				+                expotent -= 1u;
			
 
				+            }
			
 
				+
			
 
				+            expotent += 1u;
			
 
				+            mantissa &= ~0x00000400u;
			
 
				+        }
			
 
				+    }
			
 
				+    else if (expotent == 31u)
			
 
				+    {
			
 
				+        if (mantissa == 0u)
			
 
				+        {
			
 
				+            /* +/- InF */
			
 
				+            x.u = (sign << 31u) | 0x7f800000u;
			
 
				+            return x.f;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            /* +/- NaN */
			
 
				+            x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
			
 
				+            return x.f;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    expotent = expotent + (127u - 15u);
			
 
				+    mantissa = mantissa << 13u;
			
 
				+
			
 
				+    x.u = (sign << 31u) | (expotent << 23u) | mantissa;
			
 
				+    return x.f;
			
 
				+}
			
 
				+
			
 
				+enum ISEMode
			
 
				+{
			
 
				+    ISEMODE_TRIT = 0,
			
 
				+    ISEMODE_QUINT,
			
 
				+    ISEMODE_PLAIN_BIT,
			
 
				+    ISEMODE_LAST
			
 
				+};
			
 
				+
			
 
				+struct ISEParams
			
 
				+{
			
 
				+    ISEMode     mode;
			
 
				+    int         numBits;
			
 
				+    ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {}
			
 
				+};
			
 
				+
			
 
				+inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues)
			
 
				+{
			
 
				+    switch (iseParams.mode)
			
 
				+    {
			
 
				+        case ISEMODE_TRIT:          return deDivRoundUp32(numValues*8, 5) + numValues*iseParams.numBits;
			
 
				+        case ISEMODE_QUINT:         return deDivRoundUp32(numValues*7, 3) + numValues*iseParams.numBits;
			
 
				+        case ISEMODE_PLAIN_BIT:     return numValues*iseParams.numBits;
			
 
				+        default:
			
 
				+            DE_ASSERT(false);
			
 
				+            return -1;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence)
			
 
				+{
			
 
				+    int curBitsForTritMode      = 6;
			
 
				+    int curBitsForQuintMode     = 5;
			
 
				+    int curBitsForPlainBitMode  = 8;
			
 
				+
			
 
				+    while (true)
			
 
				+    {
			
 
				+        DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
			
 
				+        const int tritRange         = (curBitsForTritMode > 0)        ? (3 << curBitsForTritMode) - 1         : -1;
			
 
				+        const int quintRange        = (curBitsForQuintMode > 0)       ? (5 << curBitsForQuintMode) - 1        : -1;
			
 
				+        const int plainBitRange     = (curBitsForPlainBitMode > 0)    ? (1 << curBitsForPlainBitMode) - 1     : -1;
			
 
				+        const int maxRange          = basisu_astc::max(basisu_astc::max(tritRange, quintRange), plainBitRange);
			
 
				+
			
 
				+        if (maxRange == tritRange)
			
 
				+        {
			
 
				+            const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
			
 
				+
			
 
				+            if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
			
 
				+                return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
			
 
				+
			
 
				+            curBitsForTritMode--;
			
 
				+        }
			
 
				+        else if (maxRange == quintRange)
			
 
				+        {
			
 
				+            const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
			
 
				+
			
 
				+            if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
			
 
				+                return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
			
 
				+
			
 
				+            curBitsForQuintMode--;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
			
 
				+            DE_ASSERT(maxRange == plainBitRange);
			
 
				+
			
 
				+            if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
			
 
				+                return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
			
 
				+
			
 
				+            curBitsForPlainBitMode--;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+inline int computeNumColorEndpointValues (deUint32 endpointMode)
			
 
				+{
			
 
				+    DE_ASSERT(endpointMode < 16);
			
 
				+    return (endpointMode/4 + 1) * 2;
			
 
				+}
			
 
				+
			
 
				+// Decompression utilities
			
 
				+enum DecompressResult
			
 
				+{
			
 
				+    DECOMPRESS_RESULT_VALID_BLOCK   = 0,    //!< Decompressed valid block
			
 
				+    DECOMPRESS_RESULT_ERROR,                //!< Encountered error while decompressing, error color written
			
 
				+    DECOMPRESS_RESULT_LAST
			
 
				+};
			
 
				+
			
 
				+// A helper for getting bits from a 128-bit block.
			
 
				+class Block128
			
 
				+{
			
 
				+private:
			
 
				+    typedef deUint64 Word;
			
 
				+
			
 
				+    enum
			
 
				+    {
			
 
				+        WORD_BYTES  = sizeof(Word),
			
 
				+        WORD_BITS   = 8*WORD_BYTES,
			
 
				+        NUM_WORDS   = 128 / WORD_BITS
			
 
				+    };
			
 
				+    //DE_STATIC_ASSERT(128 % WORD_BITS == 0);
			
 
				+
			
 
				+public:
			
 
				+    Block128 (const deUint8* src)
			
 
				+    {
			
 
				+        for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
			
 
				+        {
			
 
				+            m_words[wordNdx] = 0;
			
 
				+            for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
			
 
				+                m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    deUint32 getBit (int ndx) const
			
 
				+    {
			
 
				+        DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128));
			
 
				+        return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
			
 
				+    }
			
 
				+
			
 
				+    deUint32 getBits (int low, int high) const
			
 
				+    {
			
 
				+        DE_ASSERT(basisu_astc::inBounds(low, 0, 128));
			
 
				+        DE_ASSERT(basisu_astc::inBounds(high, 0, 128));
			
 
				+        DE_ASSERT(basisu_astc::inRange(high-low+1, 0, 32));
			
 
				+
			
 
				+        if (high-low+1 == 0)
			
 
				+            return 0;
			
 
				+
			
 
				+        const int word0Ndx = low / WORD_BITS;
			
 
				+        const int word1Ndx = high / WORD_BITS;
			
 
				+        // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
			
 
				+        if (word0Ndx == word1Ndx)
			
 
				+            return (deUint32)((m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS));
			
 
				+        else
			
 
				+        {
			
 
				+            DE_ASSERT(word1Ndx == word0Ndx + 1);
			
 
				+            return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) |
			
 
				+                   (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    bool isBitSet (int ndx) const
			
 
				+    {
			
 
				+        DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128));
			
 
				+        return getBit(ndx) != 0;
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    Word m_words[NUM_WORDS];
			
 
				+};
			
 
				+
			
 
				+// A helper for sequential access into a Block128.
			
 
				+class BitAccessStream
			
 
				+{
			
 
				+public:
			
 
				+    BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward)
			
 
				+        : m_src             (src)
			
 
				+        , m_startNdxInSrc   (startNdxInSrc)
			
 
				+        , m_length          (length)
			
 
				+        , m_forward         (forward)
			
 
				+        , m_ndx             (0)
			
 
				+    {
			
 
				+    }
			
 
				+
			
 
				+    // Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
			
 
				+    deUint32 getNext (int num)
			
 
				+    {
			
 
				+        if (num == 0 || m_ndx >= m_length)
			
 
				+            return 0;
			
 
				+        const int end               = m_ndx + num;
			
 
				+        const int numBitsFromSrc    = basisu_astc::max(0, basisu_astc::min(m_length, end) - m_ndx);
			
 
				+        const int low               = m_ndx;
			
 
				+        const int high              = m_ndx + numBitsFromSrc - 1;
			
 
				+
			
 
				+        m_ndx += num;
			
 
				+        
			
 
				+        return m_forward ?             m_src.getBits(m_startNdxInSrc + low,  m_startNdxInSrc + high)
			
 
				+                         : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    const Block128&     m_src;
			
 
				+    const int           m_startNdxInSrc;
			
 
				+    const int           m_length;
			
 
				+    const bool          m_forward;
			
 
				+    int                 m_ndx;
			
 
				+};
			
 
				+
			
 
				+struct ISEDecodedResult
			
 
				+{
			
 
				+    deUint32 m;
			
 
				+    deUint32 tq; //!< Trit or quint value, depending on ISE mode.
			
 
				+    deUint32 v;
			
 
				+};
			
 
				+
			
 
				+// Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
			
 
				+struct ASTCBlockMode
			
 
				+{
			
 
				+    bool        isError;
			
 
				+    // \note Following fields only relevant if !isError.
			
 
				+    bool        isVoidExtent;
			
 
				+    // \note Following fields only relevant if !isVoidExtent.
			
 
				+    bool        isDualPlane;
			
 
				+    int         weightGridWidth;
			
 
				+    int         weightGridHeight;
			
 
				+    ISEParams   weightISEParams;
			
 
				+
			
 
				+    ASTCBlockMode (void)
			
 
				+        : isError           (true)
			
 
				+        , isVoidExtent      (true)
			
 
				+        , isDualPlane       (true)
			
 
				+        , weightGridWidth   (-1)
			
 
				+        , weightGridHeight  (-1)
			
 
				+        , weightISEParams   (ISEMODE_LAST, -1)
			
 
				+    {
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+inline int computeNumWeights (const ASTCBlockMode& mode)
			
 
				+{
			
 
				+    return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
			
 
				+}
			
 
				+
			
 
				+struct ColorEndpointPair
			
 
				+{
			
 
				+    UVec4 e0;
			
 
				+    UVec4 e1;
			
 
				+};
			
 
				+
			
 
				+struct TexelWeightPair
			
 
				+{
			
 
				+    deUint32 w[2];
			
 
				+};
			
 
				+
			
 
				+ASTCBlockMode getASTCBlockMode (deUint32 blockModeData)
			
 
				+{
			
 
				+    ASTCBlockMode blockMode;
			
 
				+    blockMode.isError = true; // \note Set to false later, if not error.
			
 
				+    blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
			
 
				+    if (!blockMode.isVoidExtent)
			
 
				+    {
			
 
				+        if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0)
			
 
				+            return blockMode; // Invalid ("reserved").
			
 
				+
			
 
				+        deUint32 r = (deUint32)-1; // \note Set in the following branches.
			
 
				+
			
 
				+        if (getBits(blockModeData, 0, 1) == 0)
			
 
				+        {
			
 
				+            const deUint32 r0   = getBit(blockModeData, 4);
			
 
				+            const deUint32 r1   = getBit(blockModeData, 2);
			
 
				+            const deUint32 r2   = getBit(blockModeData, 3);
			
 
				+            const deUint32 i78  = getBits(blockModeData, 7, 8);
			
 
				+            
			
 
				+            r = (r2 << 2) | (r1 << 1) | (r0 << 0);
			
 
				+
			
 
				+            if (i78 == 3)
			
 
				+            {
			
 
				+                const bool i5 = isBitSet(blockModeData, 5);
			
 
				+                blockMode.weightGridWidth   = i5 ? 10 : 6;
			
 
				+                blockMode.weightGridHeight  = i5 ? 6  : 10;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                const deUint32 a = getBits(blockModeData, 5, 6);
			
 
				+
			
 
				+                switch (i78)
			
 
				+                {
			
 
				+                    case 0:     blockMode.weightGridWidth = 12;     blockMode.weightGridHeight = a + 2;                                 break;
			
 
				+                    case 1:     blockMode.weightGridWidth = a + 2;  blockMode.weightGridHeight = 12;                                    break;
			
 
				+                    case 2:     blockMode.weightGridWidth = a + 6;  blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;     break;
			
 
				+                    default: DE_ASSERT(false);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            const deUint32 r0   = getBit(blockModeData, 4);
			
 
				+            const deUint32 r1   = getBit(blockModeData, 0);
			
 
				+            const deUint32 r2   = getBit(blockModeData, 1);
			
 
				+            const deUint32 i23  = getBits(blockModeData, 2, 3);
			
 
				+            const deUint32 a    = getBits(blockModeData, 5, 6);
			
 
				+
			
 
				+            r = (r2 << 2) | (r1 << 1) | (r0 << 0);
			
 
				+            if (i23 == 3)
			
 
				+            {
			
 
				+                const deUint32  b   = getBit(blockModeData, 7);
			
 
				+                const bool      i8  = isBitSet(blockModeData, 8);
			
 
				+                blockMode.weightGridWidth   = i8 ? b+2 : a+2;
			
 
				+                blockMode.weightGridHeight  = i8 ? a+2 : b+6;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                const deUint32 b = getBits(blockModeData, 7, 8);
			
 
				+                switch (i23)
			
 
				+                {
			
 
				+                    case 0:     blockMode.weightGridWidth = b + 4;  blockMode.weightGridHeight = a + 2; break;
			
 
				+                    case 1:     blockMode.weightGridWidth = b + 8;  blockMode.weightGridHeight = a + 2; break;
			
 
				+                    case 2:     blockMode.weightGridWidth = a + 2;  blockMode.weightGridHeight = b + 8; break;
			
 
				+                    default: DE_ASSERT(false);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        const bool  zeroDH      = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
			
 
				+        const bool  h           = zeroDH ? 0 : isBitSet(blockModeData, 9);
			
 
				+        blockMode.isDualPlane   = zeroDH ? 0 : isBitSet(blockModeData, 10);
			
 
				+
			
 
				+        {
			
 
				+            ISEMode&    m   = blockMode.weightISEParams.mode;
			
 
				+            int&        b   = blockMode.weightISEParams.numBits;
			
 
				+            m = ISEMODE_PLAIN_BIT;
			
 
				+            b = 0;
			
 
				+            if (h)
			
 
				+            {
			
 
				+                switch (r)
			
 
				+                {
			
 
				+                    case 2:                         m = ISEMODE_QUINT;  b = 1;  break;
			
 
				+                    case 3:     m = ISEMODE_TRIT;                       b = 2;  break;
			
 
				+                    case 4:                                             b = 4;  break;
			
 
				+                    case 5:                         m = ISEMODE_QUINT;  b = 2;  break;
			
 
				+                    case 6:     m = ISEMODE_TRIT;                       b = 3;  break;
			
 
				+                    case 7:                                             b = 5;  break;
			
 
				+                    default:    DE_ASSERT(false);
			
 
				+                }
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                switch (r)
			
 
				+                {
			
 
				+                    case 2:                                             b = 1;  break;
			
 
				+                    case 3:     m = ISEMODE_TRIT;                               break;
			
 
				+                    case 4:                                             b = 2;  break;
			
 
				+                    case 5:                         m = ISEMODE_QUINT;          break;
			
 
				+                    case 6:     m = ISEMODE_TRIT;                       b = 1;  break;
			
 
				+                    case 7:                                             b = 3;  break;
			
 
				+                    default:    DE_ASSERT(false);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    blockMode.isError = false;
			
 
				+    return blockMode;
			
 
				+}
			
 
				+
			
 
				+inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB)
			
 
				+{
			
 
				+    if (isSRGB)
			
 
				+    {
			
 
				+        deUint8* const dstU = (deUint8*)dst;
			
 
				+        for (int i = 0; i < blockWidth*blockHeight; i++)
			
 
				+        {
			
 
				+            dstU[4*i + 0] = 0xff;
			
 
				+            dstU[4*i + 1] = 0;
			
 
				+            dstU[4*i + 2] = 0xff;
			
 
				+            dstU[4*i + 3] = 0xff;
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        float* const dstF = (float*)dst;
			
 
				+        for (int i = 0; i < blockWidth*blockHeight; i++)
			
 
				+        {
			
 
				+            dstF[4*i + 0] = 1.0f;
			
 
				+            dstF[4*i + 1] = 0.0f;
			
 
				+            dstF[4*i + 2] = 1.0f;
			
 
				+            dstF[4*i + 3] = 1.0f;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+DecompressResult decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode)
			
 
				+{
			
 
				+    const deUint32  minSExtent          = blockData.getBits(12, 24);
			
 
				+    const deUint32  maxSExtent          = blockData.getBits(25, 37);
			
 
				+    const deUint32  minTExtent          = blockData.getBits(38, 50);
			
 
				+    const deUint32  maxTExtent          = blockData.getBits(51, 63);
			
 
				+    const bool      allExtentsAllOnes   = (minSExtent == 0x1fff) && (maxSExtent == 0x1fff) && (minTExtent == 0x1fff) && (maxTExtent == 0x1fff);
			
 
				+    const bool      isHDRBlock          = blockData.isBitSet(9);
			
 
				+    
			
 
				+    if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
			
 
				+    {
			
 
				+        setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
			
 
				+        return DECOMPRESS_RESULT_ERROR;
			
 
				+    }
			
 
				+    
			
 
				+    const deUint32 rgba[4] =
			
 
				+    {
			
 
				+        blockData.getBits(64,  79),
			
 
				+        blockData.getBits(80,  95),
			
 
				+        blockData.getBits(96,  111),
			
 
				+        blockData.getBits(112, 127)
			
 
				+    };
			
 
				+
			
 
				+    if (isSRGB)
			
 
				+    {
			
 
				+        deUint8* const dstU = (deUint8*)dst;
			
 
				+        for (int i = 0; i < blockWidth * blockHeight; i++)
			
 
				+        {
			
 
				+            for (int c = 0; c < 4; c++)
			
 
				+                dstU[i * 4 + c] = (deUint8)((rgba[c] & 0xff00) >> 8);
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        float* const dstF = (float*)dst;
			
 
				+
			
 
				+        if (isHDRBlock)
			
 
				+        {
			
 
				+            for (int c = 0; c < 4; c++)
			
 
				+            {
			
 
				+                if (isFloat16InfOrNan((deFloat16)rgba[c]))
			
 
				+                {
			
 
				+                    //throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)");
			
 
				+                    setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
			
 
				+                    return DECOMPRESS_RESULT_ERROR;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            for (int i = 0; i < blockWidth * blockHeight; i++)
			
 
				+            {
			
 
				+                for (int c = 0; c < 4; c++)
			
 
				+                    dstF[i * 4 + c] = deFloat16To32((deFloat16)rgba[c]);
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            for (int i = 0; i < blockWidth * blockHeight; i++)
			
 
				+            {
			
 
				+                for (int c = 0; c < 4; c++)
			
 
				+                    dstF[i * 4 + c] = (rgba[c] == 65535) ? 1.0f : ((float)rgba[c] / 65536.0f);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return DECOMPRESS_RESULT_VALID_BLOCK;
			
 
				+}
			
 
				+
			
 
				+void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart)
			
 
				+{
			
 
				+    if (numPartitions == 1)
			
 
				+        endpointModesDst[0] = blockData.getBits(13, 16);
			
 
				+    else
			
 
				+    {
			
 
				+        const deUint32 highLevelSelector = blockData.getBits(23, 24);
			
 
				+
			
 
				+        if (highLevelSelector == 0)
			
 
				+        {
			
 
				+            const deUint32 mode = blockData.getBits(25, 28);
			
 
				+
			
 
				+            for (int i = 0; i < numPartitions; i++)
			
 
				+                endpointModesDst[i] = mode;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            for (int partNdx = 0; partNdx < numPartitions; partNdx++)
			
 
				+            {
			
 
				+                const deUint32 cemClass     = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
			
 
				+                const deUint32 lowBit0Ndx   = numPartitions + 2*partNdx;
			
 
				+                const deUint32 lowBit1Ndx   = numPartitions + 2*partNdx + 1;
			
 
				+                const deUint32 lowBit0      = blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4);
			
 
				+                const deUint32 lowBit1      = blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4);
			
 
				+
			
 
				+                endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions)
			
 
				+{
			
 
				+    int result = 0;
			
 
				+
			
 
				+    for (int i = 0; i < numPartitions; i++)
			
 
				+        result += computeNumColorEndpointValues(endpointModes[i]);
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
			
 
				+{
			
 
				+    DE_ASSERT(basisu_astc::inRange(numValues, 1, 5));
			
 
				+
			
 
				+    deUint32 m[5];
			
 
				+    m[0]            = data.getNext(numBits);
			
 
				+    deUint32 T01    = data.getNext(2);
			
 
				+    m[1]            = data.getNext(numBits);
			
 
				+    deUint32 T23    = data.getNext(2);
			
 
				+    m[2]            = data.getNext(numBits);
			
 
				+    deUint32 T4     = data.getNext(1);
			
 
				+    m[3]            = data.getNext(numBits);
			
 
				+    deUint32 T56    = data.getNext(2);
			
 
				+    m[4]            = data.getNext(numBits);
			
 
				+    deUint32 T7     = data.getNext(1);
			
 
				+
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough="            
			
 
				+#endif  
			
 
				+    switch (numValues)
			
 
				+    {
			
 
				+        // \note Fall-throughs.
			
 
				+        case 1: T23     = 0;
			
 
				+        case 2: T4      = 0;
			
 
				+        case 3: T56     = 0;
			
 
				+        case 4: T7      = 0;
			
 
				+        case 5: break;
			
 
				+        default:
			
 
				+            DE_ASSERT(false);
			
 
				+    }
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif 
			
 
				+
			
 
				+    const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
			
 
				+
			
 
				+    static const deUint32 tritsFromT[256][5] =
			
 
				+    {
			
 
				+        { 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 },
			
 
				+        { 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 },
			
 
				+        { 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 },
			
 
				+        { 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 },
			
 
				+        { 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 },
			
 
				+        { 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 },
			
 
				+        { 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 },
			
 
				+        { 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 },
			
 
				+        { 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 },
			
 
				+        { 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 },
			
 
				+        { 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 },
			
 
				+        { 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 },
			
 
				+        { 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 },
			
 
				+        { 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 },
			
 
				+        { 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 },
			
 
				+        { 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 }
			
 
				+    };
			
 
				+
			
 
				+    const deUint32 (& trits)[5] = tritsFromT[T];
			
 
				+    for (int i = 0; i < numValues; i++)
			
 
				+    {
			
 
				+        dst[i].m    = m[i];
			
 
				+        dst[i].tq   = trits[i];
			
 
				+        dst[i].v    = (trits[i] << numBits) + m[i];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
			
 
				+{
			
 
				+    DE_ASSERT(basisu_astc::inRange(numValues, 1, 3));
			
 
				+
			
 
				+    deUint32 m[3];
			
 
				+    m[0]            = data.getNext(numBits);
			
 
				+    deUint32 Q012   = data.getNext(3);
			
 
				+    m[1]            = data.getNext(numBits);
			
 
				+    deUint32 Q34    = data.getNext(2);
			
 
				+    m[2]            = data.getNext(numBits);
			
 
				+    deUint32 Q56    = data.getNext(2);
			
 
				+
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough="            
			
 
				+#endif  
			
 
				+    switch (numValues)
			
 
				+    {
			
 
				+        // \note Fall-throughs.
			
 
				+        case 1: Q34     = 0;
			
 
				+        case 2: Q56     = 0;
			
 
				+        case 3: break;
			
 
				+        default:
			
 
				+            DE_ASSERT(false);
			
 
				+    }
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif 
			
 
				+
			
 
				+    const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
			
 
				+
			
 
				+    static const deUint32 quintsFromQ[256][3] =
			
 
				+    {
			
 
				+        { 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 },
			
 
				+        { 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 },
			
 
				+        { 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 },
			
 
				+        { 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 },
			
 
				+        { 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 },
			
 
				+        { 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 },
			
 
				+        { 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 },
			
 
				+        { 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 }
			
 
				+    };
			
 
				+
			
 
				+    const deUint32 (& quints)[3] = quintsFromQ[Q];
			
 
				+    for (int i = 0; i < numValues; i++)
			
 
				+    {
			
 
				+        dst[i].m    = m[i];
			
 
				+        dst[i].tq   = quints[i];
			
 
				+        dst[i].v    = (quints[i] << numBits) + m[i];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits)
			
 
				+{
			
 
				+    dst[0].m = data.getNext(numBits);
			
 
				+    dst[0].v = dst[0].m;
			
 
				+}
			
 
				+
			
 
				+void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params)
			
 
				+{
			
 
				+    if (params.mode == ISEMODE_TRIT)
			
 
				+    {
			
 
				+        const int numBlocks = deDivRoundUp32(numValues, 5);
			
 
				+        for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
			
 
				+        {
			
 
				+            const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
			
 
				+            decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits);
			
 
				+        }
			
 
				+    }
			
 
				+    else if (params.mode == ISEMODE_QUINT)
			
 
				+    {
			
 
				+        const int numBlocks = deDivRoundUp32(numValues, 3);
			
 
				+        for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
			
 
				+        {
			
 
				+            const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
			
 
				+            decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits);
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
			
 
				+        for (int i = 0; i < numValues; i++)
			
 
				+            decodeISEBitBlock(&dst[i], data, params.numBits);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams)
			
 
				+{
			
 
				+    if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT))
			
 
				+    {
			
 
				+        const int rangeCase             = iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
			
 
				+        DE_ASSERT(basisu_astc::inRange(rangeCase, 0, 10));
			
 
				+
			
 
				+        static const deUint32   Ca[11]  = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
			
 
				+        const deUint32          C       = Ca[rangeCase];
			
 
				+
			
 
				+        for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
			
 
				+        {
			
 
				+            const deUint32 a = getBit(iseResults[endpointNdx].m, 0);
			
 
				+            const deUint32 b = getBit(iseResults[endpointNdx].m, 1);
			
 
				+            const deUint32 c = getBit(iseResults[endpointNdx].m, 2);
			
 
				+            const deUint32 d = getBit(iseResults[endpointNdx].m, 3);
			
 
				+            const deUint32 e = getBit(iseResults[endpointNdx].m, 4);
			
 
				+            const deUint32 f = getBit(iseResults[endpointNdx].m, 5);
			
 
				+            const deUint32 A = (a == 0) ? 0 : (1<<9)-1;
			
 
				+
			
 
				+            const deUint32 B = (rangeCase == 0)   ? 0
			
 
				+                             : (rangeCase == 1)   ? 0
			
 
				+                             : (rangeCase == 2)   ? ((b << 8) | (b << 4) | (b << 2) | (b << 1))
			
 
				+                             : (rangeCase == 3)   ? ((b << 8) | (b << 3) | (b << 2))
			
 
				+                             : (rangeCase == 4)   ? ((c << 8) | (b << 7) | (c << 3) | (b << 2) | (c << 1) | (b << 0))
			
 
				+                             : (rangeCase == 5)   ? ((c << 8) | (b << 7) | (c << 2) | (b << 1) | (c << 0))
			
 
				+                             : (rangeCase == 6)   ? ((d << 8) | (c << 7) | (b << 6) | (d << 2) | (c << 1) | (b << 0))
			
 
				+                             : (rangeCase == 7)   ? ((d << 8) | (c << 7) | (b << 6) | (d << 1) | (c << 0))
			
 
				+                             : (rangeCase == 8)   ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 1) | (d << 0))
			
 
				+                             : (rangeCase == 9)   ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 0))
			
 
				+                             : (rangeCase == 10)  ? ((f << 8) | (e << 7) | (d << 6) | (c << 5) | (b << 4) | (f << 0))
			
 
				+                             : (deUint32)-1;
			
 
				+
			
 
				+            DE_ASSERT(B != (deUint32)-1);
			
 
				+            dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80);
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
			
 
				+        for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
			
 
				+            dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+inline void bitTransferSigned (deInt32& a, deInt32& b)
			
 
				+{
			
 
				+    b >>= 1;
			
 
				+    b |= a & 0x80;
			
 
				+    a >>= 1;
			
 
				+    a &= 0x3f;
			
 
				+    if (isBitSet(a, 5))
			
 
				+        a -= 0x40;
			
 
				+}
			
 
				+
			
 
				+inline UVec4 clampedRGBA (const IVec4& rgba)
			
 
				+{
			
 
				+    return UVec4(basisu_astc::clamp(rgba.x(), 0, 0xff),
			
 
				+        basisu_astc::clamp(rgba.y(), 0, 0xff),
			
 
				+        basisu_astc::clamp(rgba.z(), 0, 0xff),
			
 
				+        basisu_astc::clamp(rgba.w(), 0, 0xff));
			
 
				+}
			
 
				+
			
 
				+inline IVec4 blueContract (int r, int g, int b, int a)
			
 
				+{
			
 
				+    return IVec4((r+b)>>1, (g+b)>>1, b, a);
			
 
				+}
			
 
				+
			
 
				+inline bool isColorEndpointModeHDR (deUint32 mode)
			
 
				+{
			
 
				+    return (mode == 2)    ||
			
 
				+           (mode == 3)    ||
			
 
				+           (mode == 7)    ||
			
 
				+           (mode == 11)   ||
			
 
				+           (mode == 14)   ||
			
 
				+           (mode == 15);
			
 
				+}
			
 
				+
			
 
				+void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3)
			
 
				+{
			
 
				+    const deUint32 m10      = getBit(v1, 7) | (getBit(v2, 7) << 1);
			
 
				+    const deUint32 m23      = getBits(v0, 6, 7);
			
 
				+
			
 
				+    const deUint32 majComp  = (m10 != 3)  ? m10
			
 
				+                            : (m23 != 3)  ? m23
			
 
				+                            :             0;
			
 
				+    
			
 
				+    const deUint32 mode     = (m10 != 3)  ? m23
			
 
				+                            : (m23 != 3)  ? 4
			
 
				+                            :             5;
			
 
				+
			
 
				+    deInt32         red     = (deInt32)getBits(v0, 0, 5);
			
 
				+    deInt32         green   = (deInt32)getBits(v1, 0, 4);
			
 
				+    deInt32         blue    = (deInt32)getBits(v2, 0, 4);
			
 
				+    deInt32         scale   = (deInt32)getBits(v3, 0, 4);
			
 
				+
			
 
				+    {
			
 
				+#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
			
 
				+#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false)
			
 
				+
			
 
				+        const deUint32  x0  = getBit(v1, 6);
			
 
				+        const deUint32  x1  = getBit(v1, 5);
			
 
				+        const deUint32  x2  = getBit(v2, 6);
			
 
				+        const deUint32  x3  = getBit(v2, 5);
			
 
				+        const deUint32  x4  = getBit(v3, 7);
			
 
				+        const deUint32  x5  = getBit(v3, 6);
			
 
				+        const deUint32  x6  = getBit(v3, 5);
			
 
				+
			
 
				+        deInt32&        R   = red;
			
 
				+        deInt32&        G   = green;
			
 
				+        deInt32&        B   = blue;
			
 
				+        deInt32&        S   = scale;
			
 
				+
			
 
				+        switch (mode)
			
 
				+        {
			
 
				+            case 0: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,10,  R,6,  S,6,   S,5); break;
			
 
				+            case 1: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  R,10,  R,9); break;
			
 
				+            case 2: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,6,   S,7,  S,6,   S,5); break;
			
 
				+            case 3: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  S,6,   S,5); break;
			
 
				+            case 4: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  R,7,   S,5); break;
			
 
				+            case 5: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  S,6,   S,5); break;
			
 
				+            default:
			
 
				+                DE_ASSERT(false);
			
 
				+        }
			
 
				+#undef ASSIGN_X_BITS
			
 
				+#undef SHOR
			
 
				+    }
			
 
				+
			
 
				+    static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 };
			
 
				+    DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
			
 
				+
			
 
				+    red     <<= shiftAmounts[mode];
			
 
				+    green   <<= shiftAmounts[mode];
			
 
				+    blue    <<= shiftAmounts[mode];
			
 
				+    scale   <<= shiftAmounts[mode];
			
 
				+
			
 
				+    if (mode != 5)
			
 
				+    {
			
 
				+        green   = red - green;
			
 
				+        blue    = red - blue;
			
 
				+    }
			
 
				+
			
 
				+    if (majComp == 1)
			
 
				+        std::swap(red, green);
			
 
				+    else if (majComp == 2)
			
 
				+        std::swap(red, blue);
			
 
				+
			
 
				+    e0 = UVec4(basisu_astc::clamp(red   - scale,    0, 0xfff),
			
 
				+        basisu_astc::clamp(green    - scale,    0, 0xfff),
			
 
				+        basisu_astc::clamp(blue - scale,    0, 0xfff),
			
 
				+               0x780);
			
 
				+
			
 
				+    e1 = UVec4(basisu_astc::clamp(red,              0, 0xfff),
			
 
				+        basisu_astc::clamp(green,               0, 0xfff),
			
 
				+        basisu_astc::clamp(blue,                0, 0xfff),
			
 
				+               0x780);
			
 
				+}
			
 
				+
			
 
				+void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5)
			
 
				+{
			
 
				+    const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7);
			
 
				+
			
 
				+    if (major == 3)
			
 
				+    {
			
 
				+        e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780);
			
 
				+        e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780);
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
			
 
				+
			
 
				+        deInt32 a   = (deInt32)((getBit(v1, 6) << 8) | v0);
			
 
				+        deInt32 c   = (deInt32)(getBits(v1, 0, 5));
			
 
				+        deInt32 b0  = (deInt32)(getBits(v2, 0, 5));
			
 
				+        deInt32 b1  = (deInt32)(getBits(v3, 0, 5));
			
 
				+        deInt32 d0  = (deInt32)(getBits(v4, 0, 4));
			
 
				+        deInt32 d1  = (deInt32)(getBits(v5, 0, 4));
			
 
				+
			
 
				+        {
			
 
				+#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
			
 
				+#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false)
			
 
				+            const deUint32 x0 = getBit(v2, 6);
			
 
				+            const deUint32 x1 = getBit(v3, 6);
			
 
				+            const deUint32 x2 = getBit(v4, 6);
			
 
				+            const deUint32 x3 = getBit(v5, 6);
			
 
				+            const deUint32 x4 = getBit(v4, 5);
			
 
				+            const deUint32 x5 = getBit(v5, 5);
			
 
				+
			
 
				+            switch (mode)
			
 
				+            {
			
 
				+                case 0: ASSIGN_X_BITS(b0,6,  b1,6,   d0,6,  d1,6,  d0,5,  d1,5); break;
			
 
				+                case 1: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  d0,5,  d1,5); break;
			
 
				+                case 2: ASSIGN_X_BITS(a,9,   c,6,    d0,6,  d1,6,  d0,5,  d1,5); break;
			
 
				+                case 3: ASSIGN_X_BITS(b0,6,  b1,6,   a,9,   c,6,   d0,5,  d1,5); break;
			
 
				+                case 4: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  a,9,   a,10); break;
			
 
				+                case 5: ASSIGN_X_BITS(a,9,   a,10,   c,7,   c,6,   d0,5,  d1,5); break;
			
 
				+                case 6: ASSIGN_X_BITS(b0,6,  b1,6,   a,11,  c,6,   a,9,   a,10); break;
			
 
				+                case 7: ASSIGN_X_BITS(a,9,   a,10,   a,11,  c,6,   d0,5,  d1,5); break;
			
 
				+                default:
			
 
				+                    DE_ASSERT(false);
			
 
				+            }
			
 
				+#undef ASSIGN_X_BITS
			
 
				+#undef SHOR
			
 
				+        }
			
 
				+
			
 
				+        static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 };
			
 
				+        DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
			
 
				+        d0 = signExtend(d0, numDBits[mode]);
			
 
				+        d1 = signExtend(d1, numDBits[mode]);
			
 
				+        
			
 
				+        const int shiftAmount = (mode >> 1) ^ 3;
			
 
				+        a   = (uint32_t)a  << shiftAmount;
			
 
				+        c   = (uint32_t)c  << shiftAmount;
			
 
				+        b0  = (uint32_t)b0 << shiftAmount;
			
 
				+        b1  = (uint32_t)b1 << shiftAmount;
			
 
				+        d0  = (uint32_t)d0 << shiftAmount;
			
 
				+        d1  = (uint32_t)d1 << shiftAmount;
			
 
				+
			
 
				+        e0 = UVec4(basisu_astc::clamp(a-c, 0, 0xfff), basisu_astc::clamp(a-b0-c-d0, 0, 0xfff), basisu_astc::clamp(a-b1-c-d1, 0, 0xfff), 0x780);
			
 
				+        e1 = UVec4(basisu_astc::clamp(a, 0, 0xfff), basisu_astc::clamp(a-b0, 0, 0xfff), basisu_astc::clamp(a-b1, 0, 0xfff), 0x780);
			
 
				+
			
 
				+        if (major == 1)
			
 
				+        {
			
 
				+            std::swap(e0.x(), e0.y());
			
 
				+            std::swap(e1.x(), e1.y());
			
 
				+        }
			
 
				+        else if (major == 2)
			
 
				+        {
			
 
				+            std::swap(e0.x(), e0.z());
			
 
				+            std::swap(e1.x(), e1.z());
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In)
			
 
				+{
			
 
				+    decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
			
 
				+    
			
 
				+    const deUint32  mode    = (getBit(v7In, 7) << 1) | getBit(v6In, 7);
			
 
				+    deInt32         v6      = (deInt32)getBits(v6In, 0, 6);
			
 
				+    deInt32         v7      = (deInt32)getBits(v7In, 0, 6);
			
 
				+
			
 
				+    if (mode == 3)
			
 
				+    {
			
 
				+        e0.w() = v6 << 5;
			
 
				+        e1.w() = v7 << 5;
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        v6 |= (v7 << (mode+1)) & 0x780;
			
 
				+        v7 &= (0x3f >> mode);
			
 
				+        v7 ^= 0x20 >> mode;
			
 
				+        v7 -= 0x20 >> mode;
			
 
				+        v6 <<= 4-mode;
			
 
				+        v7 <<= 4-mode;
			
 
				+        v7 += v6;
			
 
				+        v7 = basisu_astc::clamp(v7, 0, 0xfff);
			
 
				+        e0.w() = v6;
			
 
				+        e1.w() = v7;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions)
			
 
				+{
			
 
				+    int unquantizedNdx = 0;
			
 
				+
			
 
				+    for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
			
 
				+    {
			
 
				+        const deUint32      endpointMode    = endpointModes[partitionNdx];
			
 
				+        const deUint32*     v               = &unquantizedEndpoints[unquantizedNdx];
			
 
				+
			
 
				+        UVec4&              e0              = dst[partitionNdx].e0;
			
 
				+        UVec4&              e1              = dst[partitionNdx].e1;
			
 
				+        unquantizedNdx += computeNumColorEndpointValues(endpointMode);
			
 
				+
			
 
				+        switch (endpointMode)
			
 
				+        {
			
 
				+            case 0:
			
 
				+            {
			
 
				+                e0 = UVec4(v[0], v[0], v[0], 0xff);
			
 
				+                e1 = UVec4(v[1], v[1], v[1], 0xff);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 1:
			
 
				+            {
			
 
				+                const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
			
 
				+                const deUint32 L1 = basisu_astc::min(0xffu, L0 + getBits(v[1], 0, 5));
			
 
				+                e0 = UVec4(L0, L0, L0, 0xff);
			
 
				+                e1 = UVec4(L1, L1, L1, 0xff);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 2:
			
 
				+            {
			
 
				+                const deUint32 v1Gr     = v[1] >= v[0];
			
 
				+                const deUint32 y0       = v1Gr ? v[0]<<4 : (v[1]<<4) + 8;
			
 
				+                const deUint32 y1       = v1Gr ? v[1]<<4 : (v[0]<<4) - 8;
			
 
				+                e0 = UVec4(y0, y0, y0, 0x780);
			
 
				+                e1 = UVec4(y1, y1, y1, 0x780);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 3:
			
 
				+            {
			
 
				+                const bool      m   = isBitSet(v[0], 7);
			
 
				+                const deUint32  y0  = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2)
			
 
				+                                        : (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
			
 
				+                const deUint32  d   = m ? getBits(v[1], 0, 4) << 2
			
 
				+                                        : getBits(v[1], 0, 3) << 1;
			
 
				+                const deUint32  y1  = basisu_astc::min(0xfffu, y0+d);
			
 
				+                e0 = UVec4(y0, y0, y0, 0x780);
			
 
				+                e1 = UVec4(y1, y1, y1, 0x780);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 4:
			
 
				+            {
			
 
				+                e0 = UVec4(v[0], v[0], v[0], v[2]);
			
 
				+                e1 = UVec4(v[1], v[1], v[1], v[3]);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 5:
			
 
				+            {
			
 
				+                deInt32 v0 = (deInt32)v[0];
			
 
				+                deInt32 v1 = (deInt32)v[1];
			
 
				+                deInt32 v2 = (deInt32)v[2];
			
 
				+                deInt32 v3 = (deInt32)v[3];
			
 
				+                bitTransferSigned(v1, v0);
			
 
				+                bitTransferSigned(v3, v2);
			
 
				+                e0 = clampedRGBA(IVec4(v0,      v0,     v0,     v2));
			
 
				+                e1 = clampedRGBA(IVec4(v0+v1,   v0+v1,  v0+v1,  v2+v3));
			
 
				+                break;
			
 
				+            }
			
 
				+            case 6:
			
 
				+                e0 = UVec4((v[0]*v[3]) >> 8,    (v[1]*v[3]) >> 8,   (v[2]*v[3]) >> 8,   0xff);
			
 
				+                e1 = UVec4(v[0],                v[1],               v[2],               0xff);
			
 
				+                break;
			
 
				+            case 7:
			
 
				+                decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
			
 
				+                break;
			
 
				+            case 8:
			
 
				+            {
			
 
				+                if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
			
 
				+                {
			
 
				+                    e0 = UVec4(v[0], v[2], v[4], 0xff);
			
 
				+                    e1 = UVec4(v[1], v[3], v[5], 0xff);
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
			
 
				+                    e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            case 9:
			
 
				+            {
			
 
				+                deInt32 v0 = (deInt32)v[0];
			
 
				+                deInt32 v1 = (deInt32)v[1];
			
 
				+                deInt32 v2 = (deInt32)v[2];
			
 
				+                deInt32 v3 = (deInt32)v[3];
			
 
				+                deInt32 v4 = (deInt32)v[4];
			
 
				+                deInt32 v5 = (deInt32)v[5];
			
 
				+                bitTransferSigned(v1, v0);
			
 
				+                bitTransferSigned(v3, v2);
			
 
				+                bitTransferSigned(v5, v4);
			
 
				+                if (v1+v3+v5 >= 0)
			
 
				+                {
			
 
				+                    e0 = clampedRGBA(IVec4(v0,      v2,     v4,     0xff));
			
 
				+                    e1 = clampedRGBA(IVec4(v0+v1,   v2+v3,  v4+v5,  0xff));
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    e0 = clampedRGBA(blueContract(v0+v1,    v2+v3,  v4+v5,  0xff));
			
 
				+                    e1 = clampedRGBA(blueContract(v0,       v2,     v4,     0xff));
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            case 10:
			
 
				+            {
			
 
				+                e0 = UVec4((v[0]*v[3]) >> 8,    (v[1]*v[3]) >> 8,   (v[2]*v[3]) >> 8,   v[4]);
			
 
				+                e1 = UVec4(v[0],                v[1],               v[2],               v[5]);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 11:
			
 
				+            {
			
 
				+                decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
			
 
				+                break;
			
 
				+            }
			
 
				+            case 12:
			
 
				+            {
			
 
				+                if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4])
			
 
				+                {
			
 
				+                    e0 = UVec4(v[0], v[2], v[4], v[6]);
			
 
				+                    e1 = UVec4(v[1], v[3], v[5], v[7]);
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
			
 
				+                    e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            case 13:
			
 
				+            {
			
 
				+                deInt32 v0 = (deInt32)v[0];
			
 
				+                deInt32 v1 = (deInt32)v[1];
			
 
				+                deInt32 v2 = (deInt32)v[2];
			
 
				+                deInt32 v3 = (deInt32)v[3];
			
 
				+                deInt32 v4 = (deInt32)v[4];
			
 
				+                deInt32 v5 = (deInt32)v[5];
			
 
				+                deInt32 v6 = (deInt32)v[6];
			
 
				+                deInt32 v7 = (deInt32)v[7];
			
 
				+                bitTransferSigned(v1, v0);
			
 
				+                bitTransferSigned(v3, v2);
			
 
				+                bitTransferSigned(v5, v4);
			
 
				+                bitTransferSigned(v7, v6);
			
 
				+                if (v1+v3+v5 >= 0)
			
 
				+                {
			
 
				+                    e0 = clampedRGBA(IVec4(v0,      v2,     v4,     v6));
			
 
				+                    e1 = clampedRGBA(IVec4(v0+v1,   v2+v3,  v4+v5,  v6+v7));
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    e0 = clampedRGBA(blueContract(v0+v1,    v2+v3,  v4+v5,  v6+v7));
			
 
				+                    e1 = clampedRGBA(blueContract(v0,       v2,     v4,     v6));
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            case 14:
			
 
				+                decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
			
 
				+                e0.w() = v[6];
			
 
				+                e1.w() = v[7];
			
 
				+                break;
			
 
				+            case 15:
			
 
				+            {
			
 
				+                decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
			
 
				+                break;
			
 
				+            }
			
 
				+            default:
			
 
				+                DE_ASSERT(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable)
			
 
				+{
			
 
				+    const int           colorEndpointDataStart = (numPartitions == 1) ? 17 : 29;
			
 
				+    ISEDecodedResult    colorEndpointData[18];
			
 
				+    
			
 
				+    {
			
 
				+        BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
			
 
				+        decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
			
 
				+    }
			
 
				+
			
 
				+    {
			
 
				+        deUint32 unquantizedEndpoints[18];
			
 
				+        unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
			
 
				+        decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void unquantizeWeights (deUint32 dst[64], const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode)
			
 
				+{
			
 
				+    const int           numWeights  = computeNumWeights(blockMode);
			
 
				+    const ISEParams&    iseParams   = blockMode.weightISEParams;
			
 
				+
			
 
				+    if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT))
			
 
				+    {
			
 
				+        const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
			
 
				+
			
 
				+        if ((rangeCase == 0) || (rangeCase == 1))
			
 
				+        {
			
 
				+            static const deUint32 map0[3]   = { 0, 32, 63 };
			
 
				+            static const deUint32 map1[5]   = { 0, 16, 32, 47, 63 };
			
 
				+            const deUint32* const map = (rangeCase == 0) ? &map0[0] : &map1[0];
			
 
				+
			
 
				+            for (int i = 0; i < numWeights; i++)
			
 
				+            {
			
 
				+                DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
			
 
				+                dst[i] = map[weightGrid[i].v];
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            DE_ASSERT(rangeCase <= 6);
			
 
				+            static const deUint32   Ca[5]   = { 50, 28, 23, 13, 11 };
			
 
				+            const deUint32          C       = Ca[rangeCase-2];
			
 
				+
			
 
				+            for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
			
 
				+            {
			
 
				+                const deUint32 a = getBit(weightGrid[weightNdx].m, 0);
			
 
				+                const deUint32 b = getBit(weightGrid[weightNdx].m, 1);
			
 
				+                const deUint32 c = getBit(weightGrid[weightNdx].m, 2);
			
 
				+                
			
 
				+                const deUint32 A = (a == 0) ? 0 : (1<<7)-1;
			
 
				+                const deUint32 B = (rangeCase == 2) ? 0
			
 
				+                                 : (rangeCase == 3) ? 0
			
 
				+                                 : (rangeCase == 4) ? (b << 6) | (b << 2) | (b << 0)
			
 
				+                                 : (rangeCase == 5) ? (b << 6) | (b << 1)
			
 
				+                                 : (rangeCase == 6) ? (c << 6) | (b << 5) | (c << 1) |  (b << 0)
			
 
				+                                 : (deUint32)-1;
			
 
				+
			
 
				+                dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
			
 
				+        for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
			
 
				+            dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
			
 
				+    }
			
 
				+
			
 
				+    for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
			
 
				+        dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
			
 
				+
			
 
				+    // Initialize nonexistent weights to poison values
			
 
				+    for (int weightNdx = numWeights; weightNdx < 64; weightNdx++)
			
 
				+        dst[weightNdx] = ~0u;
			
 
				+}
			
 
				+
			
 
				+void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeights) [64], int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
			
 
				+{
			
 
				+    const int       numWeightsPerTexel  = blockMode.isDualPlane ? 2 : 1;
			
 
				+    const deUint32  scaleX              = (1024 + blockWidth/2) / (blockWidth-1);
			
 
				+    const deUint32  scaleY              = (1024 + blockHeight/2) / (blockHeight-1);
			
 
				+    DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= (int)DE_LENGTH_OF_ARRAY(unquantizedWeights));
			
 
				+
			
 
				+    for (int texelY = 0; texelY < blockHeight; texelY++)
			
 
				+    {
			
 
				+        for (int texelX = 0; texelX < blockWidth; texelX++)
			
 
				+        {
			
 
				+            const deUint32 gX   = (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6;
			
 
				+            const deUint32 gY   = (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6;
			
 
				+            const deUint32 jX   = gX >> 4;
			
 
				+            const deUint32 jY   = gY >> 4;
			
 
				+            const deUint32 fX   = gX & 0xf;
			
 
				+            const deUint32 fY   = gY & 0xf;
			
 
				+            const deUint32 w11  = (fX*fY + 8) >> 4;
			
 
				+            const deUint32 w10  = fY - w11;
			
 
				+            const deUint32 w01  = fX - w11;
			
 
				+            const deUint32 w00  = 16 - fX - fY + w11;
			
 
				+            const deUint32 i00  = jY*blockMode.weightGridWidth + jX;
			
 
				+            const deUint32 i01  = i00 + 1;
			
 
				+            const deUint32 i10  = i00 + blockMode.weightGridWidth;
			
 
				+            const deUint32 i11  = i00 + blockMode.weightGridWidth + 1;
			
 
				+            
			
 
				+            // These addresses can be out of bounds, but respective weights will be 0 then.
			
 
				+            DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w00 == 0);
			
 
				+            DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w01 == 0);
			
 
				+            DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w10 == 0);
			
 
				+            DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w11 == 0);
			
 
				+
			
 
				+            for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
			
 
				+            {
			
 
				+                // & 0x3f clamps address to bounds of unquantizedWeights
			
 
				+                const deUint32 p00  = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
			
 
				+                const deUint32 p01  = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
			
 
				+                const deUint32 p10  = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
			
 
				+                const deUint32 p11  = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
			
 
				+                                
			
 
				+                dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
			
 
				+{
			
 
				+    ISEDecodedResult weightGrid[64];
			
 
				+
			
 
				+    {
			
 
				+        BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
			
 
				+        decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
			
 
				+    }
			
 
				+
			
 
				+    {
			
 
				+        deUint32 unquantizedWeights[64];
			
 
				+        unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
			
 
				+
			
 
				+        interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+inline deUint32 hash52 (deUint32 v)
			
 
				+{
			
 
				+    deUint32 p = v;
			
 
				+    p ^= p >> 15;   p -= p << 17;   p += p << 7;    p += p << 4;
			
 
				+    p ^= p >>  5;   p += p << 16;   p ^= p >> 7;    p ^= p >> 3;
			
 
				+    p ^= p <<  6;   p ^= p >> 17;
			
 
				+    return p;
			
 
				+}
			
 
				+
			
 
				+int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock)
			
 
				+{
			
 
				+    DE_ASSERT(zIn == 0);
			
 
				+
			
 
				+    const deUint32  x       = smallBlock ? xIn << 1 : xIn;
			
 
				+    const deUint32  y       = smallBlock ? yIn << 1 : yIn;
			
 
				+    const deUint32  z       = smallBlock ? zIn << 1 : zIn;
			
 
				+    const deUint32  seed    = seedIn + 1024*(numPartitions-1);
			
 
				+    const deUint32  rnum    = hash52(seed);
			
 
				+
			
 
				+    deUint8         seed1   = (deUint8)( rnum                           & 0xf);
			
 
				+    deUint8         seed2   = (deUint8)((rnum >>  4)                    & 0xf);
			
 
				+    deUint8         seed3   = (deUint8)((rnum >>  8)                    & 0xf);
			
 
				+    deUint8         seed4   = (deUint8)((rnum >> 12)                    & 0xf);
			
 
				+    deUint8         seed5   = (deUint8)((rnum >> 16)                    & 0xf);
			
 
				+    deUint8         seed6   = (deUint8)((rnum >> 20)                    & 0xf);
			
 
				+    deUint8         seed7   = (deUint8)((rnum >> 24)                    & 0xf);
			
 
				+    deUint8         seed8   = (deUint8)((rnum >> 28)                    & 0xf);
			
 
				+    deUint8         seed9   = (deUint8)((rnum >> 18)                    & 0xf);
			
 
				+    deUint8         seed10  = (deUint8)((rnum >> 22)                    & 0xf);
			
 
				+    deUint8         seed11  = (deUint8)((rnum >> 26)                    & 0xf);
			
 
				+    deUint8         seed12  = (deUint8)(((rnum >> 30) | (rnum << 2))    & 0xf);
			
 
				+
			
 
				+    seed1  = (deUint8)(seed1  * seed1 );
			
 
				+    seed2  = (deUint8)(seed2  * seed2 );
			
 
				+    seed3  = (deUint8)(seed3  * seed3 );
			
 
				+    seed4  = (deUint8)(seed4  * seed4 );
			
 
				+    seed5  = (deUint8)(seed5  * seed5 );
			
 
				+    seed6  = (deUint8)(seed6  * seed6 );
			
 
				+    seed7  = (deUint8)(seed7  * seed7 );
			
 
				+    seed8  = (deUint8)(seed8  * seed8 );
			
 
				+    seed9  = (deUint8)(seed9  * seed9 );
			
 
				+    seed10 = (deUint8)(seed10 * seed10);
			
 
				+    seed11 = (deUint8)(seed11 * seed11);
			
 
				+    seed12 = (deUint8)(seed12 * seed12);
			
 
				+
			
 
				+    const int shA = (seed & 2) != 0     ? 4     : 5;
			
 
				+    const int shB = numPartitions == 3  ? 6     : 5;
			
 
				+    const int sh1 = (seed & 1) != 0     ? shA   : shB;
			
 
				+    const int sh2 = (seed & 1) != 0     ? shB   : shA;
			
 
				+    const int sh3 = (seed & 0x10) != 0  ? sh1   : sh2;
			
 
				+
			
 
				+    seed1  = (deUint8)(seed1  >> sh1);
			
 
				+    seed2  = (deUint8)(seed2  >> sh2);
			
 
				+    seed3  = (deUint8)(seed3  >> sh1);
			
 
				+    seed4  = (deUint8)(seed4  >> sh2);
			
 
				+    seed5  = (deUint8)(seed5  >> sh1);
			
 
				+    seed6  = (deUint8)(seed6  >> sh2);
			
 
				+    seed7  = (deUint8)(seed7  >> sh1);
			
 
				+    seed8  = (deUint8)(seed8  >> sh2);
			
 
				+    seed9  = (deUint8)(seed9  >> sh3);
			
 
				+    seed10 = (deUint8)(seed10 >> sh3);
			
 
				+    seed11 = (deUint8)(seed11 >> sh3);
			
 
				+    seed12 = (deUint8)(seed12 >> sh3);
			
 
				+
			
 
				+    const int a =                         0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14));
			
 
				+    const int b =                         0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10));
			
 
				+    const int c = (numPartitions >= 3) ?  0x3f & (seed5*x + seed6*y + seed9*z  + (rnum >>  6))    : 0;
			
 
				+    const int d = (numPartitions >= 4) ?  0x3f & (seed7*x + seed8*y + seed10*z + (rnum >>  2))    : 0;
			
 
				+
			
 
				+    return (a >= b && a >= c && a >= d) ? 0
			
 
				+         : (b >= c && b >= d)           ? 1
			
 
				+         : (c >= d)                     ? 2
			
 
				+         :                                3;
			
 
				+}
			
 
				+
			
 
				+DecompressResult setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed,
			
 
				+                                 int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes)
			
 
				+{
			
 
				+    const bool          smallBlock  = blockWidth*blockHeight < 31;
			
 
				+    DecompressResult    result      = DECOMPRESS_RESULT_VALID_BLOCK;
			
 
				+    bool                isHDREndpoint[4];
			
 
				+
			
 
				+    for (int i = 0; i < numPartitions; i++)
			
 
				+    {
			
 
				+        isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
			
 
				+    }
			
 
				+
			
 
				+    for (int texelY = 0; texelY < blockHeight; texelY++)
			
 
				+    {
			
 
				+        for (int texelX = 0; texelX < blockWidth; texelX++)
			
 
				+        {
			
 
				+            const int texelNdx = texelY * blockWidth + texelX;
			
 
				+            const int colorEndpointNdx = (numPartitions == 1) ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock);
			
 
				+
			
 
				+            DE_ASSERT(colorEndpointNdx < numPartitions);
			
 
				+            const UVec4& e0 = colorEndpoints[colorEndpointNdx].e0;
			
 
				+            const UVec4& e1 = colorEndpoints[colorEndpointNdx].e1;
			
 
				+            const TexelWeightPair& weight = texelWeights[texelNdx];
			
 
				+
			
 
				+            if (isLDRMode && isHDREndpoint[colorEndpointNdx])
			
 
				+            {
			
 
				+                if (isSRGB)
			
 
				+                {
			
 
				+                    ((deUint8*)dst)[texelNdx * 4 + 0] = 0xff;
			
 
				+                    ((deUint8*)dst)[texelNdx * 4 + 1] = 0;
			
 
				+                    ((deUint8*)dst)[texelNdx * 4 + 2] = 0xff;
			
 
				+                    ((deUint8*)dst)[texelNdx * 4 + 3] = 0xff;
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    ((float*)dst)[texelNdx * 4 + 0] = 1.0f;
			
 
				+                    ((float*)dst)[texelNdx * 4 + 1] = 0;
			
 
				+                    ((float*)dst)[texelNdx * 4 + 2] = 1.0f;
			
 
				+                    ((float*)dst)[texelNdx * 4 + 3] = 1.0f;
			
 
				+                }
			
 
				+                result = DECOMPRESS_RESULT_ERROR;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                for (int channelNdx = 0; channelNdx < 4; channelNdx++)
			
 
				+                {
			
 
				+                    if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR.
			
 
				+                    {
			
 
				+                        const deUint32 c0 = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
			
 
				+                        const deUint32 c1 = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
			
 
				+                        const deUint32 w = weight.w[ccs == channelNdx ? 1 : 0];
			
 
				+                        const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64;
			
 
				+
			
 
				+                        if (isSRGB)
			
 
				+                            ((deUint8*)dst)[texelNdx * 4 + channelNdx] = (deUint8)((c & 0xff00) >> 8);
			
 
				+                        else
			
 
				+                            ((float*)dst)[texelNdx * 4 + channelNdx] = (c == 65535) ? 1.0f : (float)c / 65536.0f;
			
 
				+                    }
			
 
				+                    else
			
 
				+                    {
			
 
				+                        DE_ASSERT(!isSRGB);
			
 
				+                        //DE_STATIC_ASSERT((basisu_astc::meta::TypesSame<deFloat16, deUint16>::Value));
			
 
				+
			
 
				+                        const deUint32      c0 = e0[channelNdx] << 4;
			
 
				+                        const deUint32      c1 = e1[channelNdx] << 4;
			
 
				+                        const deUint32      w = weight.w[(ccs == channelNdx) ? 1 : 0];
			
 
				+                        const deUint32      c = (c0 * (64 - w) + c1 * w + 32) / 64;
			
 
				+                        const deUint32      e = getBits(c, 11, 15);
			
 
				+                        const deUint32      m = getBits(c, 0, 10);
			
 
				+                        const deUint32      mt = (m < 512) ? (3 * m)
			
 
				+                            : (m >= 1536) ? (5 * m - 2048)
			
 
				+                            : (4 * m - 512);
			
 
				+
			
 
				+                        const deFloat16     cf = (deFloat16)((e << 10) + (mt >> 3));
			
 
				+
			
 
				+                        ((float*)dst)[texelNdx * 4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
			
 
				+                    }
			
 
				+                
			
 
				+                } // channelNdx
			
 
				+            }
			
 
				+        } // texelX
			
 
				+    } // texelY
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+DecompressResult decompressBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
			
 
				+{
			
 
				+    DE_ASSERT(isLDR || !isSRGB);
			
 
				+    
			
 
				+    // Decode block mode.
			
 
				+    const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
			
 
				+    
			
 
				+    // Check for block mode errors.
			
 
				+    if (blockMode.isError)
			
 
				+    {
			
 
				+        setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
			
 
				+        return DECOMPRESS_RESULT_ERROR;
			
 
				+    }
			
 
				+    
			
 
				+    // Separate path for void-extent.
			
 
				+    if (blockMode.isVoidExtent)
			
 
				+        return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
			
 
				+    
			
 
				+    // Compute weight grid values.
			
 
				+    const int numWeights            = computeNumWeights(blockMode);
			
 
				+    const int numWeightDataBits     = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
			
 
				+    const int numPartitions         = (int)blockData.getBits(11, 12) + 1;
			
 
				+    
			
 
				+    // Check for errors in weight grid, partition and dual-plane parameters.
			
 
				+    if ((numWeights > 64)                               ||
			
 
				+        (numWeightDataBits > 96)                        ||
			
 
				+        (numWeightDataBits < 24)                        ||
			
 
				+        (blockMode.weightGridWidth > blockWidth)        ||
			
 
				+        (blockMode.weightGridHeight > blockHeight)      ||
			
 
				+        ((numPartitions == 4) && blockMode.isDualPlane))
			
 
				+    {
			
 
				+        setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
			
 
				+        return DECOMPRESS_RESULT_ERROR;
			
 
				+    }
			
 
				+    
			
 
				+    // Compute number of bits available for color endpoint data.
			
 
				+    const bool  isSingleUniqueCem           = (numPartitions == 1) || (blockData.getBits(23, 24) == 0);
			
 
				+
			
 
				+    const int   numConfigDataBits           = ((numPartitions == 1) ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) +
			
 
				+                                              (blockMode.isDualPlane ? 2 : 0);
			
 
				+
			
 
				+    const int   numBitsForColorEndpoints    = 128 - numWeightDataBits - numConfigDataBits;
			
 
				+
			
 
				+    const int   extraCemBitsStart           = 127 - numWeightDataBits - (isSingleUniqueCem      ? -1
			
 
				+                                                                        : (numPartitions == 4)  ? 7
			
 
				+                                                                        : (numPartitions == 3)  ? 4
			
 
				+                                                                        : (numPartitions == 2)  ? 1
			
 
				+                                                                        : 0);
			
 
				+    
			
 
				+    // Decode color endpoint modes.
			
 
				+    deUint32 colorEndpointModes[4];
			
 
				+    decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
			
 
				+    const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
			
 
				+    
			
 
				+    // Check for errors in color endpoint value count.
			
 
				+    if ((numColorEndpointValues > 18) || (numBitsForColorEndpoints < (int)deDivRoundUp32(13*numColorEndpointValues, 5)))
			
 
				+    {
			
 
				+        setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
			
 
				+        return DECOMPRESS_RESULT_ERROR;
			
 
				+    }
			
 
				+    
			
 
				+    // Compute color endpoints.
			
 
				+    ColorEndpointPair colorEndpoints[4];
			
 
				+    computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
			
 
				+                          computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints);
			
 
				+    
			
 
				+    // Compute texel weights.
			
 
				+    TexelWeightPair texelWeights[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT];
			
 
				+    computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
			
 
				+    
			
 
				+    // Set texel colors.
			
 
				+    const int       ccs                     = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1;
			
 
				+    const deUint32  partitionIndexSeed      = (numPartitions > 1) ? blockData.getBits(13, 22) : (deUint32)-1;
			
 
				+
			
 
				+    return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
			
 
				+}
			
 
				+
			
 
				+// Returns -1 on error, 0 if LDR, 1 if HDR
			
 
				+int isHDR(const Block128& blockData, int blockWidth, int blockHeight)
			
 
				+{
			
 
				+    // Decode block mode.
			
 
				+    const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
			
 
				+
			
 
				+    // Check for block mode errors.
			
 
				+    if (blockMode.isError)
			
 
				+        return -1;
			
 
				+
			
 
				+    // Separate path for void-extent.
			
 
				+    if (blockMode.isVoidExtent)
			
 
				+    {
			
 
				+        const bool isHDRBlock = blockData.isBitSet(9);
			
 
				+        return isHDRBlock ? 1 : 0;
			
 
				+    }
			
 
				+
			
 
				+    // Compute weight grid values.
			
 
				+    const int numWeights = computeNumWeights(blockMode);
			
 
				+    const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
			
 
				+    const int numPartitions = (int)blockData.getBits(11, 12) + 1;
			
 
				+
			
 
				+    // Check for errors in weight grid, partition and dual-plane parameters.
			
 
				+    if ((numWeights > 64) ||
			
 
				+        (numWeightDataBits > 96) ||
			
 
				+        (numWeightDataBits < 24) ||
			
 
				+        (blockMode.weightGridWidth > blockWidth) ||
			
 
				+        (blockMode.weightGridHeight > blockHeight) ||
			
 
				+        ((numPartitions == 4) && blockMode.isDualPlane))
			
 
				+    {
			
 
				+        return -1;
			
 
				+    }
			
 
				+
			
 
				+    // Compute number of bits available for color endpoint data.
			
 
				+    const bool  isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0);
			
 
				+
			
 
				+    const int   extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1
			
 
				+        : (numPartitions == 4) ? 7
			
 
				+        : (numPartitions == 3) ? 4
			
 
				+        : (numPartitions == 2) ? 1
			
 
				+        : 0);
			
 
				+
			
 
				+    // Decode color endpoint modes.
			
 
				+    deUint32 colorEndpointModes[4];
			
 
				+    decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
			
 
				+    
			
 
				+    for (int i = 0; i < numPartitions; i++)
			
 
				+    {
			
 
				+        if (isColorEndpointModeHDR(colorEndpointModes[i]))
			
 
				+            return 1;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+typedef uint16_t half_float;
			
 
				+
			
 
				+half_float float_to_half(float val, bool toward_zero)
			
 
				+{
			
 
				+    union { float f; int32_t i; uint32_t u; } fi = { val };
			
 
				+    const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
			
 
				+    int s = flt_s, e = 0, m = 0;
			
 
				+
			
 
				+    // inf/NaN
			
 
				+    if (flt_e == 0xff)
			
 
				+    {
			
 
				+        e = 31;
			
 
				+        if (flt_m != 0) // NaN
			
 
				+            m = 1;
			
 
				+    }
			
 
				+    // not zero or denormal
			
 
				+    else if (flt_e != 0)
			
 
				+    {
			
 
				+        int new_exp = flt_e - 127;
			
 
				+        if (new_exp > 15)
			
 
				+            e = 31;
			
 
				+        else if (new_exp < -14)
			
 
				+        {
			
 
				+            if (toward_zero)
			
 
				+                m = (int)truncf((1 << 24) * fabsf(fi.f));
			
 
				+            else
			
 
				+                m = lrintf((1 << 24) * fabsf(fi.f));
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            e = new_exp + 15;
			
 
				+            if (toward_zero)
			
 
				+                m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
			
 
				+            else
			
 
				+                m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    assert((0 <= m) && (m <= 1024));
			
 
				+    if (m == 1024)
			
 
				+    {
			
 
				+        e++;
			
 
				+        m = 0;
			
 
				+    }
			
 
				+
			
 
				+    assert((s >= 0) && (s <= 1));
			
 
				+    assert((e >= 0) && (e <= 31));
			
 
				+    assert((m >= 0) && (m <= 1023));
			
 
				+
			
 
				+    half_float result = (half_float)((s << 15) | (e << 10) | m);
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+float half_to_float(half_float hval)
			
 
				+{
			
 
				+    union { float f; uint32_t u; } x = { 0 };
			
 
				+
			
 
				+    uint32_t s = ((uint32_t)hval >> 15) & 1;
			
 
				+    uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
			
 
				+    uint32_t m = (uint32_t)hval & 0x3FF;
			
 
				+
			
 
				+    if (!e)
			
 
				+    {
			
 
				+        if (!m)
			
 
				+        {
			
 
				+            // +- 0
			
 
				+            x.u = s << 31;
			
 
				+            return x.f;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            // denormalized
			
 
				+            while (!(m & 0x00000400))
			
 
				+            {
			
 
				+                m <<= 1;
			
 
				+                --e;
			
 
				+            }
			
 
				+
			
 
				+            ++e;
			
 
				+            m &= ~0x00000400;
			
 
				+        }
			
 
				+    }
			
 
				+    else if (e == 31)
			
 
				+    {
			
 
				+        if (m == 0)
			
 
				+        {
			
 
				+            // +/- INF
			
 
				+            x.u = (s << 31) | 0x7f800000;
			
 
				+            return x.f;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            // +/- NaN
			
 
				+            x.u = (s << 31) | 0x7f800000 | (m << 13);
			
 
				+            return x.f;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    e = e + (127 - 15);
			
 
				+    m = m << 13;
			
 
				+
			
 
				+    assert(s <= 1);
			
 
				+    assert(m <= 0x7FFFFF);
			
 
				+    assert(e <= 255);
			
 
				+
			
 
				+    x.u = m | (e << 23) | (s << 31);
			
 
				+    return x.f;
			
 
				+}
			
 
				+
			
 
				+} // anonymous
			
 
				+
			
 
				+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.inline.html#_hdr_endpoint_decoding
			
 
				+static void convert_to_half_prec(uint32_t n, float* pVals)
			
 
				+{
			
 
				+#if 0
			
 
				+    const int prev_dir = fesetround(FE_TOWARDZERO);
			
 
				+
			
 
				+    for (uint32_t i = 0; i < n; i++)
			
 
				+        pVals[i] = half_to_float(float_to_half(pVals[i]));
			
 
				+
			
 
				+    fesetround(prev_dir);
			
 
				+
			
 
				+    for (uint32_t i = 0; i < n; i++)
			
 
				+    {
			
 
				+        assert(pVals[i] == half_to_float(float_to_half(pVals[i], true)));
			
 
				+    }
			
 
				+#else
			
 
				+    // This ensures the values are rounded towards zero as half floats.
			
 
				+    for (uint32_t i = 0; i < n; i++)
			
 
				+    {
			
 
				+        pVals[i] = half_to_float(float_to_half(pVals[i], true));
			
 
				+    }
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+bool decompress_ldr(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth, int blockHeight)
			
 
				+{
			
 
				+    float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
			
 
				+
			
 
				+    const Block128 blockData(data);
			
 
				+    
			
 
				+    // isSRGB is true, this writes uint8_t's. Otherwise it writes floats.
			
 
				+    if (decompressBlock(isSRGB ? (void*)pDst : (void*)&linear[0], blockData, blockWidth, blockHeight, isSRGB, true) != DECOMPRESS_RESULT_VALID_BLOCK)
			
 
				+    {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!isSRGB)
			
 
				+    {
			
 
				+        // Convert the floats to 8-bits with rounding.
			
 
				+        int pix = 0;
			
 
				+        for (int i = 0; i < blockHeight; i++)
			
 
				+        {
			
 
				+            for (int j = 0; j < blockWidth; j++, pix++)
			
 
				+            {
			
 
				+                pDst[4 * pix + 0] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 0] * 65536.0f + .5f), 0, 65535) >> 8);
			
 
				+                pDst[4 * pix + 1] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 1] * 65536.0f + .5f), 0, 65535) >> 8);
			
 
				+                pDst[4 * pix + 2] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 2] * 65536.0f + .5f), 0, 65535) >> 8);
			
 
				+                pDst[4 * pix + 3] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 3] * 65536.0f + .5f), 0, 65535) >> 8);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight)
			
 
				+{
			
 
				+    const Block128 blockData(data);
			
 
				+
			
 
				+    if (decompressBlock(pDstRGBA, blockData, blockWidth, blockHeight, false, false) != DECOMPRESS_RESULT_VALID_BLOCK)
			
 
				+    {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    convert_to_half_prec(blockWidth * blockHeight * 4, pDstRGBA);
			
 
				+        
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool &is_hdr)
			
 
				+{
			
 
				+    is_hdr = false;
			
 
				+
			
 
				+    const Block128 blockData(data);
			
 
				+    
			
 
				+    int status = isHDR(blockData, blockWidth, blockHeight);
			
 
				+    if (status < 0)
			
 
				+    {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    is_hdr = (status == 1);
			
 
				+
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+} // astc
			
 
				+
			
 
				+} // basisu_astc
			
 
				+
			
 
				+#if defined(__GNUC__)
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif
			
--- a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h
+++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h
@@ -0,0 +1,45 @@
 
				+// File: android_astc_decomp.h
			
 
				+#ifndef _TCUASTCUTIL_HPP
			
 
				+#define _TCUASTCUTIL_HPP
			
 
				+/*-------------------------------------------------------------------------
			
 
				+ * drawElements Quality Program Tester Core
			
 
				+ * ----------------------------------------
			
 
				+ *
			
 
				+ * Copyright 2016 The Android Open Source Project
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ *
			
 
				+ *//*!
			
 
				+ * \file
			
 
				+ * \brief ASTC Utilities.
			
 
				+ *//*--------------------------------------------------------------------*/
			
 
				+
			
 
				+#include <vector>
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+namespace basisu_astc
			
 
				+{
			
 
				+namespace astc
			
 
				+{
			
 
				+
			
 
				+// Unpacks a single ASTC block to pDst
			
 
				+// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently, 
			
 
				+// which will lead to different outputs. So be sure to set it correctly (ideally it should match whatever the encoder did).
			
 
				+bool decompress_ldr(uint8_t* pDst, const uint8_t* data, bool isSRGB, int blockWidth, int blockHeight);
			
 
				+bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight);
			
 
				+bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool& is_hdr);
			
 
				+
			
 
				+} // astc
			
 
				+} // basisu
			
 
				+
			
 
				+#endif
			
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
@@ -0,0 +1,3310 @@
 
				+// basisu_astc_hdr_enc.cpp
			
 
				+#include "basisu_astc_hdr_enc.h"
			
 
				+#include "../transcoder/basisu_transcoder.h"
			
 
				+
			
 
				+using namespace basist;
			
 
				+
			
 
				+namespace basisu
			
 
				+{
			
 
				+
			
 
				+const float DEF_R_ERROR_SCALE = 2.0f;
			
 
				+const float DEF_G_ERROR_SCALE = 3.0f;
			
 
				+
			
 
				+static inline uint32_t get_max_qlog(uint32_t bits)
			
 
				+{
			
 
				+	switch (bits)
			
 
				+	{
			
 
				+	case 7: return MAX_QLOG7;
			
 
				+	case 8: return MAX_QLOG8;
			
 
				+	case 9: return MAX_QLOG9;
			
 
				+	case 10: return MAX_QLOG10;
			
 
				+	case 11: return MAX_QLOG11;
			
 
				+	case 12: return MAX_QLOG12;
			
 
				+	case 16: return MAX_QLOG16;
			
 
				+	default: assert(0); break;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+static inline float get_max_qlog_val(uint32_t bits)
			
 
				+{
			
 
				+	switch (bits)
			
 
				+	{
			
 
				+	case 7: return MAX_QLOG7_VAL;
			
 
				+	case 8: return MAX_QLOG8_VAL;
			
 
				+	case 9: return MAX_QLOG9_VAL;
			
 
				+	case 10: return MAX_QLOG10_VAL;
			
 
				+	case 11: return MAX_QLOG11_VAL;
			
 
				+	case 12: return MAX_QLOG12_VAL;
			
 
				+	case 16: return MAX_QLOG16_VAL;
			
 
				+	default: assert(0); break;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static inline int get_bit(
			
 
				+	int src_val, int src_bit)
			
 
				+{
			
 
				+	assert(src_bit >= 0 && src_bit <= 31);
			
 
				+	int bit = (src_val >> src_bit) & 1;
			
 
				+	return bit;
			
 
				+}
			
 
				+
			
 
				+static inline void pack_bit(
			
 
				+	int& dst, int dst_bit,
			
 
				+	int src_val, int src_bit = 0)
			
 
				+{
			
 
				+	assert(dst_bit >= 0 && dst_bit <= 31);
			
 
				+	int bit = get_bit(src_val, src_bit);
			
 
				+	dst |= (bit << dst_bit);
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+astc_hdr_codec_options::astc_hdr_codec_options()
			
 
				+{
			
 
				+	init();
			
 
				+}
			
 
				+
			
 
				+void astc_hdr_codec_options::init()
			
 
				+{
			
 
				+	m_bc6h_err_weight = .85f;
			
 
				+	m_r_err_scale = DEF_R_ERROR_SCALE;
			
 
				+	m_g_err_scale = DEF_G_ERROR_SCALE;
			
 
				+
			
 
				+	// Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output.
			
 
				+	m_allow_uber_mode = false;
			
 
				+
			
 
				+	// Must set best quality level first to set defaults.
			
 
				+	set_quality_best();
			
 
				+
			
 
				+	set_quality_level(cDefaultLevel);
			
 
				+}
			
 
				+
			
 
				+void astc_hdr_codec_options::set_quality_best()
			
 
				+{
			
 
				+	m_mode11_direct_only = false;
			
 
				+		
			
 
				+	// highest achievable quality
			
 
				+	m_use_solid = true;
			
 
				+
			
 
				+	m_use_mode11 = true;
			
 
				+	m_mode11_uber_mode = true;
			
 
				+	m_first_mode11_weight_ise_range = MODE11_FIRST_ISE_RANGE;
			
 
				+	m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
			
 
				+	m_first_mode11_submode = -1;
			
 
				+	m_last_mode11_submode = 7;
			
 
				+
			
 
				+	m_use_mode7_part1 = true;
			
 
				+	m_first_mode7_part1_weight_ise_range = MODE7_PART1_FIRST_ISE_RANGE;
			
 
				+	m_last_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_use_mode7_part2 = true;
			
 
				+	m_mode7_part2_part_masks = UINT32_MAX;
			
 
				+	m_first_mode7_part2_weight_ise_range = MODE7_PART2_FIRST_ISE_RANGE;
			
 
				+	m_last_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_use_mode11_part2 = true;
			
 
				+	m_mode11_part2_part_masks = UINT32_MAX;
			
 
				+	m_first_mode11_part2_weight_ise_range = MODE11_PART2_FIRST_ISE_RANGE;
			
 
				+	m_last_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_refine_weights = true;
			
 
				+
			
 
				+	m_use_estimated_partitions = false;
			
 
				+	m_max_estimated_partitions = 0;
			
 
				+}
			
 
				+
			
 
				+void astc_hdr_codec_options::set_quality_normal()
			
 
				+{
			
 
				+	m_use_solid = true;
			
 
				+
			
 
				+	// We'll allow uber mode in normal if the user allows it.
			
 
				+	m_use_mode11 = true;
			
 
				+	m_mode11_uber_mode = true;
			
 
				+	m_first_mode11_weight_ise_range = 6;
			
 
				+	m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_use_mode7_part1 = true;
			
 
				+	m_first_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE;
			
 
				+	m_last_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_use_mode7_part2 = true;
			
 
				+	m_mode7_part2_part_masks = UINT32_MAX;
			
 
				+	m_first_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE;
			
 
				+	m_last_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_use_mode11_part2 = true;
			
 
				+	m_mode11_part2_part_masks = UINT32_MAX;
			
 
				+	m_first_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE;
			
 
				+	m_last_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_refine_weights = true;
			
 
				+}
			
 
				+
			
 
				+void astc_hdr_codec_options::set_quality_fastest()
			
 
				+{
			
 
				+	m_use_solid = true;
			
 
				+
			
 
				+	m_use_mode11 = true;
			
 
				+	m_mode11_uber_mode = false;
			
 
				+	m_first_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
			
 
				+	m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
			
 
				+
			
 
				+	m_use_mode7_part1 = false;
			
 
				+	m_use_mode7_part2 = false;
			
 
				+	m_use_mode11_part2 = false;
			
 
				+
			
 
				+	m_refine_weights = false;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+void astc_hdr_codec_options::set_quality_level(int level)
			
 
				+{
			
 
				+	level = clamp(level, cMinLevel, cMaxLevel);
			
 
				+	
			
 
				+	m_level = level;
			
 
				+
			
 
				+	switch (level)
			
 
				+	{
			
 
				+	case 0:
			
 
				+	{
			
 
				+		set_quality_fastest();
			
 
				+		break;
			
 
				+	}
			
 
				+	case 1:
			
 
				+	{
			
 
				+		set_quality_normal();
			
 
				+
			
 
				+		m_first_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE - 1;
			
 
				+		m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
			
 
				+
			
 
				+		m_use_mode7_part1 = false;
			
 
				+		m_use_mode7_part2 = false;
			
 
				+
			
 
				+		m_use_estimated_partitions = true;
			
 
				+		m_max_estimated_partitions = 1;
			
 
				+
			
 
				+		m_mode11_part2_part_masks = 1 | 2;
			
 
				+		m_mode7_part2_part_masks = 1 | 2;
			
 
				+		break;
			
 
				+	}
			
 
				+	case 2:
			
 
				+	{
			
 
				+		set_quality_normal();
			
 
				+
			
 
				+		m_use_estimated_partitions = true;
			
 
				+		m_max_estimated_partitions = 2;
			
 
				+
			
 
				+		m_mode11_part2_part_masks = 1 | 2;
			
 
				+		m_mode7_part2_part_masks = 1 | 2;
			
 
				+
			
 
				+		break;
			
 
				+	}
			
 
				+	case 3:
			
 
				+	{
			
 
				+		set_quality_best();
			
 
				+
			
 
				+		m_use_estimated_partitions = true;
			
 
				+		m_max_estimated_partitions = 2;
			
 
				+
			
 
				+		m_mode11_part2_part_masks = 1 | 2 | 4 | 8;
			
 
				+		m_mode7_part2_part_masks = 1 | 2 | 4 | 8;
			
 
				+
			
 
				+		break;
			
 
				+	}
			
 
				+	case 4:
			
 
				+	{
			
 
				+		set_quality_best();
			
 
				+
			
 
				+		break;
			
 
				+	}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+#if 0
			
 
				+static inline half_float qlog12_to_half_slow(uint32_t qlog12)
			
 
				+{
			
 
				+	return qlog_to_half_slow(qlog12, 12);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// max usable qlog8 value is 247, 248=inf, >=249 is nan
			
 
				+// max usable qlog7 value is 123, 124=inf, >=125 is nan
			
 
				+
			
 
				+// To go from a smaller qlog to an larger one, shift left by X bits.
			
 
				+
			
 
				+//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.0
			
 
				+
			
 
				+// for qlog7's shift left by 1
			
 
				+//half_float g_qlog8_to_half[256];
			
 
				+//float g_qlog8_to_float[256];
			
 
				+
			
 
				+//half_float g_qlog12_to_half[4096];
			
 
				+//float g_qlog12_to_float[4096];
			
 
				+
			
 
				+static half_float g_qlog16_to_half[65536];
			
 
				+
			
 
				+inline half_float qlog_to_half(uint32_t val, uint32_t bits)
			
 
				+{
			
 
				+	assert((bits >= 5) && (bits <= 16));
			
 
				+	assert(val < (1U << bits));
			
 
				+	return g_qlog16_to_half[val << (16 - bits)];
			
 
				+}
			
 
				+
			
 
				+// nearest values given a positive half float value (only)
			
 
				+static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768], g_half_to_qlog9[32768], g_half_to_qlog10[32768], g_half_to_qlog11[32768], g_half_to_qlog12[32768];
			
 
				+
			
 
				+const uint32_t HALF_TO_QLOG_TABS_BASE = 7;
			
 
				+static uint16_t* g_pHalf_to_qlog_tabs[8] =
			
 
				+{
			
 
				+	g_half_to_qlog7,
			
 
				+	g_half_to_qlog8,
			
 
				+
			
 
				+	g_half_to_qlog9,
			
 
				+	g_half_to_qlog10,
			
 
				+
			
 
				+	g_half_to_qlog11,
			
 
				+	g_half_to_qlog12
			
 
				+};
			
 
				+
			
 
				+static inline uint32_t half_to_qlog7_12(half_float h, uint32_t bits)
			
 
				+{
			
 
				+	assert((bits >= HALF_TO_QLOG_TABS_BASE) && (bits <= 12));
			
 
				+	assert(h < 32768);
			
 
				+
			
 
				+	return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_BASE][h];
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+// Input is the low 11 bits of the qlog
			
 
				+// Returns the 10-bit mantissa of the half float value
			
 
				+static int qlog11_to_half_float_mantissa(int M)
			
 
				+{
			
 
				+	assert(M <= 0x7FF);
			
 
				+	int Mt;
			
 
				+	if (M < 512)
			
 
				+		Mt = 3 * M;
			
 
				+	else if (M >= 1536)
			
 
				+		Mt = 5 * M - 2048;
			
 
				+	else
			
 
				+		Mt = 4 * M - 512;
			
 
				+	return (Mt >> 3);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// Input is the 10-bit mantissa of the half float value
			
 
				+// Output is the 11-bit qlog value
			
 
				+// Inverse of qlog11_to_half_float_mantissa()
			
 
				+static inline int half_float_mantissa_to_qlog11(int hf)
			
 
				+{
			
 
				+	int q0 = (hf * 8 + 2) / 3;
			
 
				+	int q1 = (hf * 8 + 2048 + 4) / 5;
			
 
				+
			
 
				+	if (q0 < 512)
			
 
				+		return q0;
			
 
				+	else if (q1 >= 1536)
			
 
				+		return q1;
			
 
				+
			
 
				+	int q2 = (hf * 8 + 512 + 2) / 4;
			
 
				+	return q2;
			
 
				+}
			
 
				+
			
 
				+static inline int half_to_qlog16(int hf)
			
 
				+{
			
 
				+	// extract 5 bits exponent, which is carried through to qlog16 unchanged
			
 
				+	const int exp = (hf >> 10) & 0x1F;
			
 
				+
			
 
				+	// extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless)
			
 
				+	const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF);
			
 
				+	assert(mantissa <= 0x7FF);
			
 
				+
			
 
				+	// Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights.
			
 
				+	uint32_t qlog16 = (exp << 11) | mantissa;
			
 
				+
			
 
				+	// should be a lossless operation
			
 
				+	assert(qlog16_to_half_slow(qlog16) == hf);
			
 
				+
			
 
				+	return qlog16;
			
 
				+}
			
 
				+
			
 
				+static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits)
			
 
				+{
			
 
				+	assert((desired_bits >= 7) && (desired_bits <= 12));
			
 
				+	assert(q16 <= 65535);
			
 
				+
			
 
				+	const uint32_t shift = 16 - desired_bits;
			
 
				+	uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift;
			
 
				+
			
 
				+	uint32_t max_val = (1U << desired_bits) - 1U;
			
 
				+	e = minimum<uint32_t>(e, max_val);
			
 
				+
			
 
				+	return e;
			
 
				+}
			
 
				+
			
 
				+static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector<float> &qlog16_to_float)
			
 
				+{
			
 
				+	assert(bits >= 5 && bits <= 12);
			
 
				+	const uint32_t max_val = (1 << bits) - 1;
			
 
				+
			
 
				+	// For all positive half-floats
			
 
				+	for (uint32_t h = 0; h < 32768; h++)
			
 
				+	{
			
 
				+		// Skip invalid values
			
 
				+		if (is_half_inf_or_nan((half_float)h))
			
 
				+			continue;
			
 
				+		const float desired_val = half_to_float((half_float)h);
			
 
				+
			
 
				+		float best_err = 1e+30f;
			
 
				+		uint32_t best_qlog = 0;
			
 
				+
			
 
				+		// For all possible qlog's
			
 
				+		for (uint32_t i = 0; i <= max_val; i++)
			
 
				+		{
			
 
				+			// Skip invalid values
			
 
				+			float v = qlog16_to_float[i << (16 - bits)];
			
 
				+			if (std::isnan(v))
			
 
				+				continue;
			
 
				+
			
 
				+			// Compute error
			
 
				+			float err = fabs(v - desired_val);
			
 
				+
			
 
				+			// Find best
			
 
				+			if (err < best_err)
			
 
				+			{
			
 
				+				best_err = err;
			
 
				+				best_qlog = i;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		pTable[h] = (uint16_t)best_qlog;
			
 
				+	}
			
 
				+
			
 
				+#if 0
			
 
				+	uint32_t t = 0;
			
 
				+
			
 
				+	const uint32_t nb = 12;
			
 
				+	int nb_shift = 16 - nb;
			
 
				+
			
 
				+	for (uint32_t q16 = 0; q16 < 65536; q16++)
			
 
				+	{
			
 
				+		half_float h = qlog16_to_half_slow(q16);
			
 
				+		if (is_half_inf_or_nan(h))
			
 
				+			continue;
			
 
				+
			
 
				+		int q7 = half_to_qlog7_12(h, nb);
			
 
				+
			
 
				+		uint32_t best_err = UINT32_MAX, best_l = 0;
			
 
				+		for (int l = 0; l < (1 << nb); l++)
			
 
				+		{
			
 
				+			int dec_q16 = l << nb_shift;
			
 
				+			int err = iabs(dec_q16 - q16);
			
 
				+			if (err < best_err)
			
 
				+			{
			
 
				+				best_err = err;
			
 
				+				best_l = l;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		//int e = (q16 + 253) >> 9; // 345
			
 
				+
			
 
				+		int e = (q16 + (1 << (nb_shift - 1)) - 1) >> nb_shift; // 285
			
 
				+		if (best_l != e)
			
 
				+			//if (q7 != best_l)
			
 
				+		{
			
 
				+			printf("q16=%u, h=%u, q7=%u, e=%u, best_l=%u\n", q16, h, q7, e, best_l);
			
 
				+			t++;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	printf("Mismatches: %u\n", t);
			
 
				+	exit(0);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static void init_qlog_tables()
			
 
				+{
			
 
				+	basisu::vector<float> qlog16_to_float(65536);
			
 
				+
			
 
				+	// for all possible qlog16, compute the corresponding half float
			
 
				+	for (uint32_t i = 0; i <= 65535; i++)
			
 
				+	{
			
 
				+		half_float h = qlog16_to_half_slow(i);
			
 
				+		g_qlog16_to_half[i] = h;
			
 
				+
			
 
				+		qlog16_to_float[i] = half_to_float(h);
			
 
				+	}
			
 
				+
			
 
				+	// for all possible half floats, find the nearest qlog5-12 float
			
 
				+	for (uint32_t bits = HALF_TO_QLOG_TABS_BASE; bits <= 12; bits++)
			
 
				+	{
			
 
				+		compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_BASE], qlog16_to_float);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// [ise_range][0] = # levels
			
 
				+// [ise_range][1...] = lerp value [0,64]
			
 
				+// in ASTC order
			
 
				+// Supported ISE weight ranges: 0 to 10, 11 total
			
 
				+const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = 1; // ISE 1=3 levels
			
 
				+const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = 10; // ISE 10=24 levels
			
 
				+
			
 
				+static const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][32] =
			
 
				+{
			
 
				+	{ 0 }, // ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block)
			
 
				+	{ 3, 0, 32, 64 }, // 1
			
 
				+	{ 4, 0, 21, 43, 64 }, // 2
			
 
				+	{ 5, 0, 16, 32, 48, 64 }, // 3
			
 
				+	{ 6, 0, 64, 12, 52, 25, 39 }, // 4
			
 
				+	{ 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 5
			
 
				+	{ 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 6
			
 
				+	{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7
			
 
				+	{ 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 8
			
 
				+	{ 20, 0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35 }, // 9
			
 
				+	{ 24, 0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34 } // 10
			
 
				+};
			
 
				+
			
 
				+//{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7
			
 
				+//static const uint8_t g_weight_order_7[12] = { 0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1 };
			
 
				+
			
 
				+static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels)
			
 
				+{
			
 
				+	vec3F mean(0.0f);
			
 
				+
			
 
				+	for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+	{
			
 
				+		const vec4F& p = pPixels[i];
			
 
				+
			
 
				+		mean[0] += p[0];
			
 
				+		mean[1] += p[1];
			
 
				+		mean[2] += p[2];
			
 
				+	}
			
 
				+
			
 
				+	return mean / static_cast<float>(num_pixels);
			
 
				+}
			
 
				+
			
 
				+static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color)
			
 
				+{
			
 
				+	float cov[6] = { 0, 0, 0, 0, 0, 0 };
			
 
				+
			
 
				+	for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+	{
			
 
				+		const vec4F& v = pPixels[i];
			
 
				+
			
 
				+		float r = v[0] - mean_color[0];
			
 
				+		float g = v[1] - mean_color[1];
			
 
				+		float b = v[2] - mean_color[2];
			
 
				+
			
 
				+		cov[0] += r * r;
			
 
				+		cov[1] += r * g;
			
 
				+		cov[2] += r * b;
			
 
				+		cov[3] += g * g;
			
 
				+		cov[4] += g * b;
			
 
				+		cov[5] += b * b;
			
 
				+	}
			
 
				+
			
 
				+	float xr = .9f, xg = 1.0f, xb = .7f;
			
 
				+	for (uint32_t iter = 0; iter < 3; iter++)
			
 
				+	{
			
 
				+		float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
			
 
				+		float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
			
 
				+		float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
			
 
				+
			
 
				+		float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b));
			
 
				+
			
 
				+		if (m > 1e-10f)
			
 
				+		{
			
 
				+			m = 1.0f / m;
			
 
				+
			
 
				+			r *= m;
			
 
				+			g *= m;
			
 
				+			b *= m;
			
 
				+		}
			
 
				+
			
 
				+		xr = r;
			
 
				+		xg = g;
			
 
				+		xb = b;
			
 
				+	}
			
 
				+
			
 
				+	float len = xr * xr + xg * xg + xb * xb;
			
 
				+
			
 
				+	vec3F axis;
			
 
				+	if (len < 1e-10f)
			
 
				+		axis.set(0.0f);
			
 
				+	else
			
 
				+	{
			
 
				+		len = 1.0f / sqrtf(len);
			
 
				+
			
 
				+		xr *= len;
			
 
				+		xg *= len;
			
 
				+		xb *= len;
			
 
				+
			
 
				+		axis.set(xr, xg, xb, 0);
			
 
				+	}
			
 
				+
			
 
				+	if (axis.dot(axis) < .5f)
			
 
				+	{
			
 
				+		axis.set(1.0f, 1.0f, 1.0f, 0.0f);
			
 
				+		axis.normalize_in_place();
			
 
				+	}
			
 
				+
			
 
				+	return axis;
			
 
				+}
			
 
				+
			
 
				+static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr)
			
 
				+{
			
 
				+#if 0
			
 
				+	assert(mean[0] >= input_box[0][0]);
			
 
				+	assert(mean[1] >= input_box[0][1]);
			
 
				+	assert(mean[2] >= input_box[0][2]);
			
 
				+	assert(mean[0] <= input_box[1][0]);
			
 
				+	assert(mean[1] <= input_box[1][1]);
			
 
				+	assert(mean[2] <= input_box[1][2]);
			
 
				+#endif
			
 
				+
			
 
				+	if (pInside)
			
 
				+		*pInside = false;
			
 
				+
			
 
				+	vec3F k(mean + dir * df);
			
 
				+	if (colorspace_box.contains(k))
			
 
				+	{
			
 
				+		if (pInside)
			
 
				+			*pInside = true;
			
 
				+
			
 
				+		return k;
			
 
				+	}
			
 
				+
			
 
				+	// starts inside
			
 
				+	vec3F s(mean);
			
 
				+
			
 
				+	// ends outside
			
 
				+	vec3F e(mean + dir * df);
			
 
				+
			
 
				+	// a ray guaranteed to go from the outside to inside
			
 
				+	ray3F r(e, (s - e).normalize_in_place());
			
 
				+	vec3F c;
			
 
				+	float t = 0.0f;
			
 
				+
			
 
				+	intersection::result res = intersection::ray_aabb(c, t, r, input_box);
			
 
				+	if (res != intersection::cSuccess)
			
 
				+		c = k;
			
 
				+
			
 
				+	return c;
			
 
				+}
			
 
				+
			
 
				+// all in Q16 space, 0-65535
			
 
				+static bool compute_least_squares_endpoints_rgb(
			
 
				+	uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
			
 
				+	vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)
			
 
				+{
			
 
				+	// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf 
			
 
				+	// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
			
 
				+	// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
			
 
				+	float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
			
 
				+	float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
			
 
				+	float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
			
 
				+	float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
			
 
				+
			
 
				+	for (uint32_t i = 0; i < N; i++)
			
 
				+	{
			
 
				+		const uint32_t sel = pSelectors[i];
			
 
				+		z00 += pSelector_weights[sel][0];
			
 
				+		z10 += pSelector_weights[sel][1];
			
 
				+		z11 += pSelector_weights[sel][2];
			
 
				+
			
 
				+		float w = pSelector_weights[sel][3];
			
 
				+		q00_r += w * pColors[i][0];
			
 
				+		t_r += pColors[i][0];
			
 
				+
			
 
				+		q00_g += w * pColors[i][1];
			
 
				+		t_g += pColors[i][1];
			
 
				+
			
 
				+		q00_b += w * pColors[i][2];
			
 
				+		t_b += pColors[i][2];
			
 
				+	}
			
 
				+
			
 
				+	q10_r = t_r - q00_r;
			
 
				+	q10_g = t_g - q00_g;
			
 
				+	q10_b = t_b - q00_b;
			
 
				+
			
 
				+	z01 = z10;
			
 
				+
			
 
				+	float det = z00 * z11 - z01 * z10;
			
 
				+	if (det == 0.0f)
			
 
				+		return false;
			
 
				+
			
 
				+	det = 1.0f / det;
			
 
				+
			
 
				+	float iz00, iz01, iz10, iz11;
			
 
				+	iz00 = z11 * det;
			
 
				+	iz01 = -z01 * det;
			
 
				+	iz10 = -z10 * det;
			
 
				+	iz11 = z00 * det;
			
 
				+
			
 
				+	(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
			
 
				+	(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
			
 
				+
			
 
				+	(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
			
 
				+	(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
			
 
				+
			
 
				+	(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);
			
 
				+	(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);
			
 
				+
			
 
				+	for (uint32_t c = 0; c < 3; c++)
			
 
				+	{
			
 
				+		float l = (*pXl)[c], h = (*pXh)[c];
			
 
				+
			
 
				+		if (input_box.get_dim(c) < .0000125f)
			
 
				+		{
			
 
				+			l = input_box[0][c];
			
 
				+			h = input_box[1][c];
			
 
				+		}
			
 
				+
			
 
				+		(*pXl)[c] = l;
			
 
				+		(*pXh)[c] = h;
			
 
				+	}
			
 
				+
			
 
				+	vec3F mean((*pXl + *pXh) * .5f);
			
 
				+	vec3F dir(*pXh - *pXl);
			
 
				+
			
 
				+	float ln = dir.length();
			
 
				+	if (ln)
			
 
				+	{
			
 
				+		dir /= ln;
			
 
				+
			
 
				+		float ld = (*pXl - mean).dot(dir);
			
 
				+		float hd = (*pXh - mean).dot(dir);
			
 
				+
			
 
				+		aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));
			
 
				+
			
 
				+		bool was_inside1 = false;
			
 
				+
			
 
				+		vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);
			
 
				+		if (!was_inside1)
			
 
				+			*pXl = l;
			
 
				+
			
 
				+		bool was_inside2 = false;
			
 
				+		vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);
			
 
				+		if (!was_inside2)
			
 
				+			*pXh = h;
			
 
				+	}
			
 
				+
			
 
				+	pXl->clamp(0.0f, MAX_QLOG16_VAL);
			
 
				+	pXh->clamp(0.0f, MAX_QLOG16_VAL);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24];
			
 
				+
			
 
				+static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; // [ise_range][astc_index] -> linear index
			
 
				+static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; // [ise_range][linear_index] -> astc_index
			
 
				+
			
 
				+static void encode_astc_hdr_init()
			
 
				+{
			
 
				+	// Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w
			
 
				+	for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++)
			
 
				+	{
			
 
				+		const uint32_t num_levels = g_ise_weight_lerps[range][0];
			
 
				+		assert((num_levels >= 3) && (num_levels <= 24));
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_levels; i++)
			
 
				+		{
			
 
				+			float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f);
			
 
				+
			
 
				+			g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++)
			
 
				+	{
			
 
				+		const uint32_t num_levels = g_ise_weight_lerps[ise_range][0];
			
 
				+		assert((num_levels >= 3) && (num_levels <= 24));
			
 
				+
			
 
				+		uint32_t s[32];
			
 
				+		for (uint32_t i = 0; i < num_levels; i++)
			
 
				+			s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i;
			
 
				+
			
 
				+		std::sort(s, s + num_levels);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_levels; i++)
			
 
				+			g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_levels; i++)
			
 
				+			g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void interpolate_qlog12_colors(
			
 
				+	const int e[2][3],
			
 
				+	half_float* pDecoded_half,
			
 
				+	vec3F* pDecoded_float,
			
 
				+	uint32_t n, uint32_t ise_weight_range)
			
 
				+{
			
 
				+	assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+
			
 
				+	for (uint32_t i = 0; i < 2; i++)
			
 
				+	{
			
 
				+		for (uint32_t j = 0; j < 3; j++)
			
 
				+		{
			
 
				+			assert(in_range(e[i][j], 0, 0xFFF));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for (uint32_t i = 0; i < n; i++)
			
 
				+	{
			
 
				+		const int c = g_ise_weight_lerps[ise_weight_range][1 + i];
			
 
				+		assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range));
			
 
				+
			
 
				+		half_float rf, gf, bf;
			
 
				+
			
 
				+		{
			
 
				+			uint32_t r0 = e[0][0] << 4;
			
 
				+			uint32_t r1 = e[1][0] << 4;
			
 
				+			int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
			
 
				+			rf = qlog16_to_half_slow(ri);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			uint32_t g0 = e[0][1] << 4;
			
 
				+			uint32_t g1 = e[1][1] << 4;
			
 
				+			int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
			
 
				+			gf = qlog16_to_half_slow(gi);
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			uint32_t b0 = e[0][2] << 4;
			
 
				+			uint32_t b1 = e[1][2] << 4;
			
 
				+			int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
			
 
				+			bf = qlog16_to_half_slow(bi);
			
 
				+		}
			
 
				+
			
 
				+		if (pDecoded_half)
			
 
				+		{
			
 
				+			pDecoded_half[i * 3 + 0] = rf;
			
 
				+			pDecoded_half[i * 3 + 1] = gf;
			
 
				+			pDecoded_half[i * 3 + 2] = bf;
			
 
				+		}
			
 
				+
			
 
				+		if (pDecoded_float)
			
 
				+		{
			
 
				+			pDecoded_float[i][0] = half_to_float(rf);
			
 
				+			pDecoded_float[i][1] = half_to_float(gf);
			
 
				+			pDecoded_float[i][2] = half_to_float(bf);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// decoded in ASTC order, not linear order
			
 
				+// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
			
 
				+bool get_astc_hdr_mode_11_block_colors(
			
 
				+	const uint8_t* pEndpoints,
			
 
				+	half_float* pDecoded_half,
			
 
				+	vec3F* pDecoded_float,
			
 
				+	uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
			
 
				+{
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+
			
 
				+	int e[2][3];
			
 
				+	if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range))
			
 
				+		return false;
			
 
				+
			
 
				+	interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+// decoded in ASTC order, not linear order
			
 
				+// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
			
 
				+bool get_astc_hdr_mode_7_block_colors(
			
 
				+	const uint8_t* pEndpoints,
			
 
				+	half_float* pDecoded_half,
			
 
				+	vec3F* pDecoded_float,
			
 
				+	uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
			
 
				+{
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+
			
 
				+	int e[2][3];
			
 
				+	if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range))
			
 
				+		return false;
			
 
				+
			
 
				+	interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+// Fast high precision piecewise linear approximation of log2(bias+x).
			
 
				+// Half may be zero, positive or denormal. No NaN/Inf/negative.
			
 
				+static inline double q(half_float x)
			
 
				+{
			
 
				+	union { float f; int32_t i; uint32_t u; } fi;
			
 
				+
			
 
				+	fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
			
 
				+
			
 
				+	assert(fi.f >= 0.0f);
			
 
				+
			
 
				+	fi.f += .125f;
			
 
				+
			
 
				+	return (double)fi.u; // approx log2f(fi.f), need to return double for the precision
			
 
				+}
			
 
				+
			
 
				+double eval_selectors(
			
 
				+	uint32_t num_pixels,
			
 
				+	uint8_t* pWeights,
			
 
				+	const half_float* pBlock_pixels_half,
			
 
				+	uint32_t num_weight_levels,
			
 
				+	const half_float* pDecoded_half,
			
 
				+	const astc_hdr_codec_options& coptions,
			
 
				+	uint32_t usable_selector_bitmask)
			
 
				+{
			
 
				+	assert((num_pixels >= 1) && (num_pixels <= 16));
			
 
				+	assert(usable_selector_bitmask);
			
 
				+
			
 
				+	const float R_WEIGHT = coptions.m_r_err_scale;
			
 
				+	const float G_WEIGHT = coptions.m_g_err_scale;
			
 
				+
			
 
				+	double total_error = 0;
			
 
				+
			
 
				+#ifdef _DEBUG
			
 
				+	for (uint32_t i = 0; i < num_weight_levels; i++)
			
 
				+	{
			
 
				+		assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
			
 
				+		assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
			
 
				+		assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	for (uint32_t p = 0; p < num_pixels; p++)
			
 
				+	{
			
 
				+		const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
			
 
				+
			
 
				+		double lowest_e = 1e+30f;
			
 
				+
			
 
				+		// this is an approximation of MSLE
			
 
				+		for (uint32_t i = 0; i < num_weight_levels; i++)
			
 
				+		{
			
 
				+			if (((1 << i) & usable_selector_bitmask) == 0)
			
 
				+				continue;
			
 
				+
			
 
				+			// compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE
			
 
				+			double rd = q(pDecoded_half[i * 3 + 0]) - q(pDesired_half[0]);
			
 
				+			double gd = q(pDecoded_half[i * 3 + 1]) - q(pDesired_half[1]);
			
 
				+			double bd = q(pDecoded_half[i * 3 + 2]) - q(pDesired_half[2]);
			
 
				+
			
 
				+			double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
			
 
				+
			
 
				+			if (e < lowest_e)
			
 
				+			{
			
 
				+				lowest_e = e;
			
 
				+				pWeights[p] = (uint8_t)i;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		total_error += lowest_e;
			
 
				+
			
 
				+	} // p
			
 
				+
			
 
				+	return total_error;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+double compute_block_error(const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_options& coptions)
			
 
				+{
			
 
				+	const float R_WEIGHT = coptions.m_r_err_scale;
			
 
				+	const float G_WEIGHT = coptions.m_g_err_scale;
			
 
				+
			
 
				+	double total_error = 0;
			
 
				+		
			
 
				+	for (uint32_t p = 0; p < 16; p++)
			
 
				+	{
			
 
				+		double rd = q(pOrig_block[p * 3 + 0]) - q(pPacked_block[p * 3 + 0]);
			
 
				+		double gd = q(pOrig_block[p * 3 + 1]) - q(pPacked_block[p * 3 + 1]);
			
 
				+		double bd = q(pOrig_block[p * 3 + 2]) - q(pPacked_block[p * 3 + 2]);
			
 
				+
			
 
				+		double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
			
 
				+
			
 
				+		total_error += e;
			
 
				+	}
			
 
				+
			
 
				+	return total_error;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag)
			
 
				+{
			
 
				+	assert(l < h);
			
 
				+
			
 
				+	if (v < l)
			
 
				+	{
			
 
				+		max_clamp_mag = basisu::maximum<int>(max_clamp_mag, l - v);
			
 
				+
			
 
				+		v = l;
			
 
				+		did_clamp = true;
			
 
				+	}
			
 
				+	else if (v > h)
			
 
				+	{
			
 
				+		max_clamp_mag = basisu::maximum<int>(max_clamp_mag, v - h);
			
 
				+
			
 
				+		v = h;
			
 
				+		did_clamp = true;
			
 
				+	}
			
 
				+
			
 
				+	return v;
			
 
				+}
			
 
				+
			
 
				+static bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag)
			
 
				+{
			
 
				+	assert(submode <= 7);
			
 
				+
			
 
				+	const uint8_t s_b_bits[8] = { 7, 8, 6, 7,  8, 6, 7, 6 };
			
 
				+	const uint8_t s_c_bits[8] = { 6, 6, 7, 7,  6, 7, 7, 7 };
			
 
				+	const uint8_t s_d_bits[8] = { 7, 6, 7, 6,  5, 6, 5, 6 };
			
 
				+
			
 
				+	const uint32_t a_bits = 9 + (submode >> 1);
			
 
				+	const uint32_t b_bits = s_b_bits[submode];
			
 
				+	const uint32_t c_bits = s_c_bits[submode];
			
 
				+	const uint32_t d_bits = s_d_bits[submode];
			
 
				+
			
 
				+	const int max_a_val = (1 << a_bits) - 1;
			
 
				+	const int max_b_val = (1 << b_bits) - 1;
			
 
				+	const int max_c_val = (1 << c_bits) - 1;
			
 
				+
			
 
				+	// The maximum usable value before it turns to NaN/Inf
			
 
				+	const int max_a_qlog = get_max_qlog(a_bits);
			
 
				+
			
 
				+	const int min_d_val = -(1 << (d_bits - 1));
			
 
				+	const int max_d_val = -min_d_val - 1;
			
 
				+	assert((max_d_val - min_d_val + 1) == (1 << d_bits));
			
 
				+
			
 
				+	int val_q[2][3];
			
 
				+
			
 
				+	for (uint32_t c = 0; c < 3; c++)
			
 
				+	{
			
 
				+#if 1
			
 
				+		// this is better
			
 
				+		const half_float l = qlog16_to_half_slow((uint32_t)std::round(low_q16[c]));
			
 
				+		val_q[0][c] = half_to_qlog7_12(l, a_bits);
			
 
				+		
			
 
				+		const half_float h = qlog16_to_half_slow((uint32_t)std::round(high_q16[c]));
			
 
				+		val_q[1][c] = half_to_qlog7_12(h, a_bits);
			
 
				+#else
			
 
				+		val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits);
			
 
				+		val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits);
			
 
				+#endif
			
 
				+				
			
 
				+#if 1
			
 
				+		if (val_q[0][c] == val_q[1][c])
			
 
				+		{
			
 
				+#if 0
			
 
				+			if (l <= h)
			
 
				+#else
			
 
				+			if (low_q16[c] < high_q16[c])
			
 
				+#endif
			
 
				+			{
			
 
				+				if (val_q[0][c])
			
 
				+					val_q[0][c]--;
			
 
				+
			
 
				+				if (val_q[1][c] != max_a_val)
			
 
				+					val_q[1][c]++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (val_q[0][c] != max_a_val)
			
 
				+					val_q[0][c]++;
			
 
				+
			
 
				+				if (val_q[1][c])
			
 
				+					val_q[1][c]--;
			
 
				+			}
			
 
				+		}
			
 
				+#endif
			
 
				+
			
 
				+		val_q[0][c] = minimum<uint32_t>(val_q[0][c], max_a_qlog);
			
 
				+		val_q[1][c] = minimum<uint32_t>(val_q[1][c], max_a_qlog);
			
 
				+	}
			
 
				+
			
 
				+	int highest_q = -1, highest_val = 0, highest_comp = 0;
			
 
				+
			
 
				+	for (uint32_t v = 0; v < 2; v++)
			
 
				+	{
			
 
				+		for (uint32_t c = 0; c < 3; c++)
			
 
				+		{
			
 
				+			assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val);
			
 
				+
			
 
				+			if (val_q[v][c] > highest_q)
			
 
				+			{
			
 
				+				highest_q = val_q[v][c];
			
 
				+				highest_val = v;
			
 
				+				highest_comp = c;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q);
			
 
				+
			
 
				+	if (highest_val != 1)
			
 
				+	{
			
 
				+		for (uint32_t c = 0; c < 3; c++)
			
 
				+		{
			
 
				+			std::swap(val_q[0][c], val_q[1][c]);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (highest_comp)
			
 
				+	{
			
 
				+		std::swap(val_q[0][0], val_q[0][highest_comp]);
			
 
				+		std::swap(val_q[1][0], val_q[1][highest_comp]);
			
 
				+	}
			
 
				+
			
 
				+	int orig_q[2][3];
			
 
				+	memcpy(orig_q, val_q, sizeof(val_q));
			
 
				+
			
 
				+	// val[1][0] is now guaranteed to be highest
			
 
				+	int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0;
			
 
				+	int best_max_clamp_mag = 0;
			
 
				+	bool best_did_clamp = false;
			
 
				+	int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 }  };
			
 
				+	BASISU_NOTE_UNUSED(best_q);
			
 
				+	uint32_t best_dist = UINT_MAX;
			
 
				+
			
 
				+	for (uint32_t pass = 0; pass < 2; pass++)
			
 
				+	{
			
 
				+		int trial_va = val_q[1][0];
			
 
				+
			
 
				+		assert(trial_va <= max_a_val);
			
 
				+		assert(trial_va >= val_q[1][1]);
			
 
				+		assert(trial_va >= val_q[1][2]);
			
 
				+
			
 
				+		assert(trial_va >= val_q[0][0]);
			
 
				+		assert(trial_va >= val_q[0][1]);
			
 
				+		assert(trial_va >= val_q[0][2]);
			
 
				+
			
 
				+		bool did_clamp = false;
			
 
				+		int trial_max_clamp_mag = 0;
			
 
				+
			
 
				+		int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag);
			
 
				+		int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag);
			
 
				+		int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag);
			
 
				+		int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
			
 
				+		int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
			
 
				+
			
 
				+		if (!did_clamp)
			
 
				+		{
			
 
				+			// Make sure decoder gets the expected values
			
 
				+			assert(trial_va == val_q[1][0]);
			
 
				+			assert(trial_va - trial_vb0 == val_q[1][1]);
			
 
				+			assert(trial_va - trial_vb1 == val_q[1][2]);
			
 
				+
			
 
				+			assert((trial_va - trial_vc) == val_q[0][0]);
			
 
				+			assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]);
			
 
				+			assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]);
			
 
				+		}
			
 
				+
			
 
				+		const int r_e0 = clamp<int>(trial_va, 0, max_a_val);
			
 
				+		const int r_e1 = clamp<int>(trial_va - trial_vb0, 0, max_a_val);
			
 
				+		const int r_e2 = clamp<int>(trial_va - trial_vb1, 0, max_a_val);
			
 
				+
			
 
				+		const int r_f0 = clamp<int>(trial_va - trial_vc, 0, max_a_val);
			
 
				+		const int r_f1 = clamp<int>(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val);
			
 
				+		const int r_f2 = clamp<int>(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val);
			
 
				+
			
 
				+		assert(r_e0 <= max_a_qlog);
			
 
				+		assert(r_e1 <= max_a_qlog);
			
 
				+		assert(r_e2 <= max_a_qlog);
			
 
				+
			
 
				+		assert(r_f0 <= max_a_qlog);
			
 
				+		assert(r_f1 <= max_a_qlog);
			
 
				+		assert(r_f2 <= max_a_qlog);
			
 
				+
			
 
				+		if ((!did_clamp) || (!had_tie))
			
 
				+		{
			
 
				+			best_va = trial_va;
			
 
				+			best_vb0 = trial_vb0;
			
 
				+			best_vb1 = trial_vb1;
			
 
				+			best_vc = trial_vc;
			
 
				+			best_vd0 = trial_vd0;
			
 
				+			best_vd1 = trial_vd1;
			
 
				+			best_max_clamp_mag = trial_max_clamp_mag;
			
 
				+			best_did_clamp = did_clamp;
			
 
				+
			
 
				+			best_q[1][0] = r_e0;
			
 
				+			best_q[1][1] = r_e1;
			
 
				+			best_q[1][2] = r_e2;
			
 
				+			best_q[0][0] = r_f0;
			
 
				+			best_q[0][1] = r_f1;
			
 
				+			best_q[0][2] = r_f2;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		// we had a tie and it did clamp, try swapping L/H for a potential slight gain
			
 
				+
			
 
				+		const uint32_t r_dist1 = basisu::square<int>(r_e0 - val_q[1][0]) + basisu::square<int>(r_e1 - val_q[1][1]) + basisu::square<int>(r_e2 - val_q[1][2]);
			
 
				+		const uint32_t r_dist0 = basisu::square<int>(r_f0 - val_q[0][0]) + basisu::square<int>(r_f1 - val_q[0][1]) + basisu::square<int>(r_f2 - val_q[0][2]);
			
 
				+
			
 
				+		const uint32_t total_dist = r_dist1 + r_dist0;
			
 
				+
			
 
				+		if (total_dist < best_dist)
			
 
				+		{
			
 
				+			best_dist = total_dist;
			
 
				+
			
 
				+			best_va = trial_va;
			
 
				+			best_vb0 = trial_vb0;
			
 
				+			best_vb1 = trial_vb1;
			
 
				+			best_vc = trial_vc;
			
 
				+			best_vd0 = trial_vd0;
			
 
				+			best_vd1 = trial_vd1;
			
 
				+			best_did_clamp = did_clamp;
			
 
				+
			
 
				+			best_q[1][0] = r_e0;
			
 
				+			best_q[1][1] = r_e1;
			
 
				+			best_q[1][2] = r_e2;
			
 
				+			best_q[0][0] = r_f0;
			
 
				+			best_q[0][1] = r_f1;
			
 
				+			best_q[0][2] = r_f2;
			
 
				+		}
			
 
				+
			
 
				+		for (uint32_t c = 0; c < 3; c++)
			
 
				+			std::swap(val_q[0][c], val_q[1][c]);
			
 
				+	}
			
 
				+
			
 
				+	// pack bits now
			
 
				+	int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;
			
 
				+
			
 
				+	int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;
			
 
				+	switch (submode)
			
 
				+	{
			
 
				+	case 0:
			
 
				+		x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
			
 
				+		break;
			
 
				+	case 1:
			
 
				+		x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
			
 
				+		break;
			
 
				+	case 2:
			
 
				+		x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
			
 
				+		break;
			
 
				+	case 3:
			
 
				+		x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
			
 
				+		break;
			
 
				+	case 4:
			
 
				+		x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
			
 
				+		break;
			
 
				+	case 5:
			
 
				+		x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
			
 
				+		break;
			
 
				+	case 6:
			
 
				+		x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
			
 
				+		break;
			
 
				+	case 7:
			
 
				+		x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
			
 
				+		break;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	// write mode
			
 
				+	pack_bit(v1, 7, submode, 0);
			
 
				+	pack_bit(v2, 7, submode, 1);
			
 
				+	pack_bit(v3, 7, submode, 2);
			
 
				+
			
 
				+	// highest component
			
 
				+	pack_bit(v4, 7, highest_comp, 0);
			
 
				+	pack_bit(v5, 7, highest_comp, 1);
			
 
				+
			
 
				+	// write bit 8 of va
			
 
				+	pack_bit(v1, 6, best_va, 8);
			
 
				+
			
 
				+	// extra bits
			
 
				+	pack_bit(v2, 6, x0);
			
 
				+	pack_bit(v3, 6, x1);
			
 
				+	pack_bit(v4, 6, x2);
			
 
				+	pack_bit(v5, 6, x3);
			
 
				+	pack_bit(v4, 5, x4);
			
 
				+	pack_bit(v5, 5, x5);
			
 
				+
			
 
				+	v0 = best_va & 0xFF;
			
 
				+	v1 |= (best_vc & 63);
			
 
				+	v2 |= (best_vb0 & 63);
			
 
				+	v3 |= (best_vb1 & 63);
			
 
				+	v4 |= (best_vd0 & 31);
			
 
				+	v5 |= (best_vd1 & 31);
			
 
				+
			
 
				+	assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));
			
 
				+
			
 
				+	pEndpoints[0] = (uint8_t)v0;
			
 
				+	pEndpoints[1] = (uint8_t)v1;
			
 
				+	pEndpoints[2] = (uint8_t)v2;
			
 
				+	pEndpoints[3] = (uint8_t)v3;
			
 
				+	pEndpoints[4] = (uint8_t)v4;
			
 
				+	pEndpoints[5] = (uint8_t)v5;
			
 
				+
			
 
				+#ifdef _DEBUG
			
 
				+	// Test for valid pack by unpacking
			
 
				+	{
			
 
				+		if (highest_comp)
			
 
				+		{
			
 
				+			std::swap(best_q[0][0], best_q[0][highest_comp]);
			
 
				+			std::swap(best_q[1][0], best_q[1][highest_comp]);
			
 
				+
			
 
				+			std::swap(orig_q[0][0], orig_q[0][highest_comp]);
			
 
				+			std::swap(orig_q[1][0], orig_q[1][highest_comp]);
			
 
				+		}
			
 
				+
			
 
				+		int test_e[2][3];
			
 
				+		decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS);
			
 
				+		for (uint32_t i = 0; i < 2; i++)
			
 
				+		{
			
 
				+			for (uint32_t j = 0; j < 3; j++)
			
 
				+			{
			
 
				+				assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits));
			
 
				+
			
 
				+				if (!best_did_clamp)
			
 
				+				{
			
 
				+					assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) ||
			
 
				+						(orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits)));
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	max_clamp_mag = best_max_clamp_mag;
			
 
				+
			
 
				+	return best_did_clamp;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static void pack_astc_mode11_direct(uint8_t* pEndpoints, const vec3F& l_q16, const vec3F& h_q16)
			
 
				+{
			
 
				+	for (uint32_t i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		// TODO: This goes from QLOG16->HALF->QLOG8/7
			
 
				+		half_float l_half = qlog16_to_half_slow(clamp((int)std::round(l_q16[i]), 0, 65535));
			
 
				+		half_float h_half = qlog16_to_half_slow(clamp((int)std::round(h_q16[i]), 0, 65535));
			
 
				+
			
 
				+		int l_q, h_q;
			
 
				+
			
 
				+		if (i == 2)
			
 
				+		{
			
 
				+			l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)];
			
 
				+			h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)];
			
 
				+
			
 
				+			l_q = minimum<uint32_t>(l_q, MAX_QLOG7);
			
 
				+			h_q = minimum<uint32_t>(h_q, MAX_QLOG7);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)];
			
 
				+			h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)];
			
 
				+
			
 
				+			l_q = minimum<uint32_t>(l_q, MAX_QLOG8);
			
 
				+			h_q = minimum<uint32_t>(h_q, MAX_QLOG8);
			
 
				+		}
			
 
				+
			
 
				+#if 1
			
 
				+		if (l_q == h_q)
			
 
				+		{
			
 
				+			const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8;
			
 
				+
			
 
				+			if (l_q16[i] <= h_q16[i])
			
 
				+			{
			
 
				+				if (l_q)
			
 
				+					l_q--;
			
 
				+
			
 
				+				if (h_q != m)
			
 
				+					h_q++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (h_q)
			
 
				+					h_q--;
			
 
				+
			
 
				+				if (l_q != m)
			
 
				+					l_q++;
			
 
				+			}
			
 
				+		}
			
 
				+#endif
			
 
				+				
			
 
				+		if (i == 2)
			
 
				+		{
			
 
				+			assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7);
			
 
				+			l_q |= 128;
			
 
				+			h_q |= 128;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8);
			
 
				+		}
			
 
				+
			
 
				+		pEndpoints[2 * i + 0] = (uint8_t)l_q;
			
 
				+		pEndpoints[2 * i + 1] = (uint8_t)h_q;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range)
			
 
				+{
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+
			
 
				+	assert(submode <= 5);
			
 
				+	max_clamp_mag = 0;
			
 
				+
			
 
				+	static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 };
			
 
				+	static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 };
			
 
				+	static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 };
			
 
				+
			
 
				+	// The precision of the components
			
 
				+	const uint32_t prec_bits = s_r_bits[submode];
			
 
				+
			
 
				+	int qlog[4], pack_bits[4];
			
 
				+
			
 
				+	for (uint32_t i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		const float f = (i == 3) ? s_q16 : rgb_q16[i];
			
 
				+
			
 
				+		// The # of bits the component is packed into
			
 
				+		if (i == 0)
			
 
				+			pack_bits[i] = s_r_bits[submode];
			
 
				+		else if (i == 3)
			
 
				+			pack_bits[i] = s_s_bits[submode];
			
 
				+		else
			
 
				+			pack_bits[i] = s_g_b_bits[submode];
			
 
				+
			
 
				+#if 0
			
 
				+		// this is slightly worse
			
 
				+		// TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error.
			
 
				+		half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16);
			
 
				+		qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits);
			
 
				+#else
			
 
				+		qlog[i] = quant_qlog16(clamp<int>((int)std::round(f), 0, MAX_QLOG16), prec_bits);
			
 
				+
			
 
				+		// Only bias if there are enough texel weights, 4=6 weights
			
 
				+		if (ise_weight_range >= 4)
			
 
				+		{
			
 
				+			// Explictly bias the high color, and the scale up, to better exploit the weights.
			
 
				+			// The quantized range also then encompases the complete input range.
			
 
				+			const uint32_t max_val = (1 << prec_bits) - 1;
			
 
				+			const uint32_t K = 3;
			
 
				+			if (i == 3)
			
 
				+			{
			
 
				+				qlog[i] = minimum<uint32_t>(qlog[i] + K * 2, max_val);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				qlog[i] = minimum<uint32_t>(qlog[i] + K, max_val);
			
 
				+			}
			
 
				+		}
			
 
				+#endif
			
 
				+
			
 
				+		if (i != 3)
			
 
				+			qlog[i] = minimum<uint32_t>(qlog[i], get_max_qlog(prec_bits));
			
 
				+
			
 
				+		// If S=0, we lose freedom for the texel weights to add any value.
			
 
				+		if ((i == 3) && (qlog[i] == 0))
			
 
				+			qlog[i] = 1;
			
 
				+	}
			
 
				+
			
 
				+	uint32_t maj_index = 0;
			
 
				+
			
 
				+	bool did_clamp = false;
			
 
				+
			
 
				+	if (submode != 5)
			
 
				+	{
			
 
				+		int largest_qlog = 0;
			
 
				+		for (uint32_t i = 0; i < 3; i++)
			
 
				+		{
			
 
				+			if (qlog[i] > largest_qlog)
			
 
				+			{
			
 
				+				largest_qlog = qlog[i];
			
 
				+				maj_index = i;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (maj_index)
			
 
				+		{
			
 
				+			std::swap(qlog[0], qlog[maj_index]);
			
 
				+		}
			
 
				+
			
 
				+		assert(qlog[0] >= qlog[1]);
			
 
				+		assert(qlog[0] >= qlog[2]);
			
 
				+
			
 
				+		qlog[1] = qlog[0] - qlog[1];
			
 
				+		qlog[2] = qlog[0] - qlog[2];
			
 
				+
			
 
				+		for (uint32_t i = 1; i < 4; i++)
			
 
				+		{
			
 
				+			const int max_val = (1 << pack_bits[i]) - 1;
			
 
				+
			
 
				+			if (qlog[i] > max_val)
			
 
				+			{
			
 
				+				max_clamp_mag = maximum<int>(max_clamp_mag, qlog[i] - max_val);
			
 
				+				qlog[i] = max_val;
			
 
				+				did_clamp = true;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for (uint32_t i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		const int max_val = (1 << pack_bits[i]) - 1; (void)max_val;
			
 
				+
			
 
				+		assert(qlog[i] <= max_val);
			
 
				+	}
			
 
				+
			
 
				+	int mode = 0;
			
 
				+
			
 
				+	int r = qlog[0] & 63; // 6-bits
			
 
				+	int g = qlog[1] & 31; // 5-bits
			
 
				+	int b = qlog[2] & 31; // 5-bits
			
 
				+	int s = qlog[3] & 31; // 5-bits
			
 
				+
			
 
				+	int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0;
			
 
				+
			
 
				+	switch (submode)
			
 
				+	{
			
 
				+	case 0:
			
 
				+	{
			
 
				+		mode = (maj_index << 2) | 0;
			
 
				+		assert((mode & 0xC) != 0xC);
			
 
				+
			
 
				+		x0 = get_bit(qlog[0], 9); // R9
			
 
				+		x1 = get_bit(qlog[0], 8); // R8
			
 
				+		x2 = get_bit(qlog[0], 7); // R7
			
 
				+		x3 = get_bit(qlog[0], 10); // R10
			
 
				+		x4 = get_bit(qlog[0], 6); // R6 
			
 
				+		x5 = get_bit(qlog[3], 6); // S6
			
 
				+		x6 = get_bit(qlog[3], 5); // S5
			
 
				+		break;
			
 
				+	}
			
 
				+	case 1:
			
 
				+	{
			
 
				+		mode = (maj_index << 2) | 1;
			
 
				+		assert((mode & 0xC) != 0xC);
			
 
				+
			
 
				+		x0 = get_bit(qlog[0], 8); // R8
			
 
				+		x1 = get_bit(qlog[1], 5); // G5
			
 
				+		x2 = get_bit(qlog[0], 7); // R7
			
 
				+		x3 = get_bit(qlog[2], 5); // B5
			
 
				+		x4 = get_bit(qlog[0], 6); // R6 
			
 
				+		x5 = get_bit(qlog[0], 10); // R10
			
 
				+		x6 = get_bit(qlog[0], 9); // R9
			
 
				+		break;
			
 
				+	}
			
 
				+	case 2:
			
 
				+	{
			
 
				+		mode = (maj_index << 2) | 2;
			
 
				+		assert((mode & 0xC) != 0xC);
			
 
				+
			
 
				+		x0 = get_bit(qlog[0], 9); // R9
			
 
				+		x1 = get_bit(qlog[0], 8); // R8
			
 
				+		x2 = get_bit(qlog[0], 7); // R7
			
 
				+		x3 = get_bit(qlog[0], 6); // R6
			
 
				+		x4 = get_bit(qlog[3], 7); // S7 
			
 
				+		x5 = get_bit(qlog[3], 6); // S6
			
 
				+		x6 = get_bit(qlog[3], 5); // S5
			
 
				+		break;
			
 
				+	}
			
 
				+	case 3:
			
 
				+	{
			
 
				+		mode = (maj_index << 2) | 3;
			
 
				+		assert((mode & 0xC) != 0xC);
			
 
				+
			
 
				+		x0 = get_bit(qlog[0], 8); // R8
			
 
				+		x1 = get_bit(qlog[1], 5); // G5
			
 
				+		x2 = get_bit(qlog[0], 7); // R7
			
 
				+		x3 = get_bit(qlog[2], 5); // B5
			
 
				+		x4 = get_bit(qlog[0], 6); // R6 
			
 
				+		x5 = get_bit(qlog[3], 6); // S6
			
 
				+		x6 = get_bit(qlog[3], 5); // S5
			
 
				+		break;
			
 
				+	}
			
 
				+	case 4:
			
 
				+	{
			
 
				+		mode = maj_index | 0xC; // 0b1100
			
 
				+		assert((mode & 0xC) == 0xC);
			
 
				+		assert(mode != 0xF);
			
 
				+
			
 
				+		x0 = get_bit(qlog[1], 6); // G6
			
 
				+		x1 = get_bit(qlog[1], 5); // G5
			
 
				+		x2 = get_bit(qlog[2], 6); // B6
			
 
				+		x3 = get_bit(qlog[2], 5); // B5
			
 
				+		x4 = get_bit(qlog[0], 6); // R6 
			
 
				+		x5 = get_bit(qlog[0], 7); // R7
			
 
				+		x6 = get_bit(qlog[3], 5); // S5
			
 
				+		break;
			
 
				+	}
			
 
				+	case 5:
			
 
				+	{
			
 
				+		mode = 0xF;
			
 
				+
			
 
				+		x0 = get_bit(qlog[1], 6); // G6
			
 
				+		x1 = get_bit(qlog[1], 5); // G5
			
 
				+		x2 = get_bit(qlog[2], 6); // B6
			
 
				+		x3 = get_bit(qlog[2], 5); // B5
			
 
				+		x4 = get_bit(qlog[0], 6); // R6 
			
 
				+		x5 = get_bit(qlog[3], 6); // S6
			
 
				+		x6 = get_bit(qlog[3], 5); // S5
			
 
				+		break;
			
 
				+	}
			
 
				+	default:
			
 
				+	{
			
 
				+		assert(0);
			
 
				+		break;
			
 
				+	}
			
 
				+	}
			
 
				+
			
 
				+	pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r);
			
 
				+	pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g);
			
 
				+	pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b);
			
 
				+	pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s);
			
 
				+
			
 
				+#ifdef _DEBUG
			
 
				+	// Test for valid pack by unpacking
			
 
				+	{
			
 
				+		const int inv_shift = 12 - prec_bits;
			
 
				+
			
 
				+		int unpacked_e[2][3];
			
 
				+		if (submode != 5)
			
 
				+		{
			
 
				+			unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
			
 
				+			unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF);
			
 
				+			unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF);
			
 
				+
			
 
				+			unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
			
 
				+			unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF);
			
 
				+			unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
			
 
				+			unpacked_e[1][1] = left_shift32(qlog[1], inv_shift);
			
 
				+			unpacked_e[1][2] = left_shift32(qlog[2], inv_shift);
			
 
				+
			
 
				+			unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
			
 
				+			unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF);
			
 
				+			unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF);
			
 
				+		}
			
 
				+
			
 
				+		if (maj_index)
			
 
				+		{
			
 
				+			std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]);
			
 
				+			std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]);
			
 
				+		}
			
 
				+
			
 
				+		int e[2][3];
			
 
				+		decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 3; i++)
			
 
				+		{
			
 
				+			assert(unpacked_e[0][i] == e[0][i]);
			
 
				+			assert(unpacked_e[1][i] == e[1][i]);
			
 
				+		}
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	return did_clamp;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static void quantize_ise_endpoints(uint32_t ise_endpoint_range, const uint8_t* pSrc_endpoints, uint8_t *pDst_endpoints, uint32_t n)
			
 
				+{
			
 
				+	assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+
			
 
				+	if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
			
 
				+	{
			
 
				+		memcpy(pDst_endpoints, pSrc_endpoints, n);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		for (uint32_t i = 0; i < n; i++)
			
 
				+		{
			
 
				+			uint32_t v = pSrc_endpoints[i];
			
 
				+			assert(v <= 255);
			
 
				+
			
 
				+			pDst_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_val_to_ise[v];
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+// Note this could fail to find any valid solution if use_endpoint_range!=20.
			
 
				+// Returns true if improved.
			
 
				+static bool try_mode11(uint32_t num_pixels,
			
 
				+	uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
			
 
				+	vec3F& low_color_q16, const vec3F& high_color_q16,
			
 
				+	half_float block_pixels_half[16][3],
			
 
				+	uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_options& coptions, bool direct_only, uint32_t ise_endpoint_range, 
			
 
				+	bool constrain_ise_weight8_selectors, 
			
 
				+	int32_t first_submode, int32_t last_submode) // -1, 7
			
 
				+{
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+	assert((num_weight_levels >= 3) && (num_weight_levels <= 32));
			
 
				+	assert((num_pixels >= 1) && (num_pixels <= 16));
			
 
				+
			
 
				+	bool improved_flag = false;
			
 
				+
			
 
				+	half_float decoded_half[32][3];
			
 
				+	vec3F decoded_float[32];
			
 
				+	uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16];
			
 
				+
			
 
				+	if (direct_only)
			
 
				+	{
			
 
				+		first_submode = -1;
			
 
				+		last_submode = -1;
			
 
				+	}
			
 
				+
			
 
				+	assert(first_submode <= last_submode);
			
 
				+	assert((first_submode >= -1) && (first_submode <= 7));
			
 
				+	assert((last_submode >= -1) && (last_submode <= 7));
			
 
				+
			
 
				+	// TODO: First determine if a submode doesn't clamp first. If one is found, encode to that and we're done.
			
 
				+	for (int submode = last_submode; submode >= first_submode; submode--)
			
 
				+	{
			
 
				+		bool did_clamp = false;
			
 
				+		int max_clamp_mag = 0;
			
 
				+		if (submode == -1)
			
 
				+		{
			
 
				+			// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
			
 
				+			pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag);
			
 
				+
			
 
				+			// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
			
 
				+			const int MAX_CLAMP_MAG_ACCEPT_THRESH = 4;
			
 
				+			if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
			
 
				+				continue;
			
 
				+		}
			
 
				+				
			
 
				+		// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
			
 
				+		// It could massively distort the endpoints, but still result in a valid encoding.
			
 
				+		quantize_ise_endpoints(ise_endpoint_range, orig_trial_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
			
 
				+		
			
 
				+		if (!get_astc_hdr_mode_11_block_colors(trial_endpoints, &decoded_half[0][0], decoded_float, num_weight_levels, ise_weight_range, ise_endpoint_range))
			
 
				+			continue;
			
 
				+
			
 
				+		uint32_t usable_selector_bitmask = UINT32_MAX;
			
 
				+		if ((constrain_ise_weight8_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))
			
 
				+			usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);
			
 
				+
			
 
				+		double trial_blk_error = eval_selectors(num_pixels, trial_weights, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);
			
 
				+		if (trial_blk_error < cur_block_error)
			
 
				+		{
			
 
				+			cur_block_error = trial_blk_error;
			
 
				+			memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
			
 
				+			memcpy(pWeights, trial_weights, num_pixels);
			
 
				+			submode_used = submode + 1;
			
 
				+			improved_flag = true;
			
 
				+		}
			
 
				+
			
 
				+		// If it didn't clamp it was a lossless encode at this precision, so we can stop early as there's probably no use trying lower precision submodes.
			
 
				+		// (Although it may be, because a lower precision pack could try nearby voxel coords.)
			
 
				+		// However, at lower levels quantization may cause the decoded endpoints to be very distorted, so we need to evaluate up to direct.
			
 
				+		if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) 
			
 
				+		{
			
 
				+			if (!did_clamp)
			
 
				+				break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return improved_flag;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static bool try_mode7(
			
 
				+	uint32_t num_pixels,
			
 
				+	uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
			
 
				+	vec3F& high_color_q16, const float s_q16,
			
 
				+	half_float block_pixels_half[16][3],
			
 
				+	uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_options& coptions, 
			
 
				+	uint32_t ise_endpoint_range)
			
 
				+{
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+	assert((num_pixels >= 1) && (num_pixels <= 16));
			
 
				+
			
 
				+	bool improved_flag = false;
			
 
				+
			
 
				+	half_float decoded_half[24][3];
			
 
				+	vec3F decoded_float[24];
			
 
				+
			
 
				+	uint8_t orig_trial_endpoints[NUM_MODE7_ENDPOINTS], trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16];
			
 
				+
			
 
				+	// TODO: First determine if a submode doesn't clamp first. If one is found, encode to that and we're done.
			
 
				+	for (int submode = 0; submode <= 5; submode++)
			
 
				+	{
			
 
				+		int max_clamp_mag = 0;
			
 
				+		const bool did_clamp = pack_astc_mode7_submode(submode, orig_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range);
			
 
				+
			
 
				+		if (submode < 5)
			
 
				+		{
			
 
				+			const int MAX_CLAMP_MAG_ACCEPT_THRESH = 4;
			
 
				+			if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
			
 
				+				continue;
			
 
				+		}
			
 
				+
			
 
				+		// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
			
 
				+		// It could massively distort the endpoints, but still result in a valid encoding.
			
 
				+		quantize_ise_endpoints(ise_endpoint_range, orig_trial_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
			
 
				+
			
 
				+		if (!get_astc_hdr_mode_7_block_colors(trial_endpoints, &decoded_half[0][0], decoded_float, num_weight_levels, ise_weight_range, ise_endpoint_range))
			
 
				+			continue;
			
 
				+
			
 
				+		double trial_blk_error = eval_selectors(num_pixels, trial_weights, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions);
			
 
				+		if (trial_blk_error < cur_block_error)
			
 
				+		{
			
 
				+			cur_block_error = trial_blk_error;
			
 
				+			memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
			
 
				+			memcpy(pWeights, trial_weights, num_pixels);
			
 
				+			submode_used = submode;
			
 
				+			improved_flag = true;
			
 
				+		}
			
 
				+
			
 
				+		if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
			
 
				+		{
			
 
				+			if (!did_clamp)
			
 
				+				break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return improved_flag;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static double encode_astc_hdr_block_mode_11(
			
 
				+	uint32_t num_pixels,
			
 
				+	const vec4F* pBlock_pixels,
			
 
				+	uint32_t ise_weight_range,
			
 
				+	uint32_t& best_submode,
			
 
				+	double cur_block_error,
			
 
				+	uint8_t* blk_endpoints, uint8_t* blk_weights,
			
 
				+	const astc_hdr_codec_options& coptions,
			
 
				+	bool direct_only,
			
 
				+	uint32_t ise_endpoint_range,
			
 
				+	bool uber_mode,
			
 
				+	bool constrain_ise_weight8_selectors,
			
 
				+	int32_t first_submode, int32_t last_submode)
			
 
				+{
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
			
 
				+	assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+	assert((num_pixels >= 1) && (num_pixels <= 16));
			
 
				+
			
 
				+	best_submode = 0;
			
 
				+
			
 
				+	half_float block_pixels_half[16][3];
			
 
				+	vec4F block_pixels_q16[16];
			
 
				+		
			
 
				+	// TODO: This is done redundantly.
			
 
				+	for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+	{
			
 
				+		block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]);
			
 
				+		block_pixels_q16[i][0] = (float)half_to_qlog16(block_pixels_half[i][0]);
			
 
				+
			
 
				+		block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]);
			
 
				+		block_pixels_q16[i][1] = (float)half_to_qlog16(block_pixels_half[i][1]);
			
 
				+
			
 
				+		block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]);
			
 
				+		block_pixels_q16[i][2] = (float)half_to_qlog16(block_pixels_half[i][2]);
			
 
				+
			
 
				+		block_pixels_q16[i][3] = 0.0f;
			
 
				+	}
			
 
				+
			
 
				+	const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
			
 
				+	
			
 
				+	// TODO: should match MAX_SUPPORTED_ISE_WEIGHT_INDEX
			
 
				+	const uint32_t MAX_WEIGHT_LEVELS = 32;
			
 
				+	(void)MAX_WEIGHT_LEVELS;
			
 
				+	assert(num_weight_levels <= MAX_WEIGHT_LEVELS);
			
 
				+
			
 
				+	vec3F block_mean_color_q16(calc_mean(num_pixels, block_pixels_q16));
			
 
				+	vec3F block_axis_q16(calc_rgb_pca(num_pixels, block_pixels_q16, block_mean_color_q16));
			
 
				+
			
 
				+	aabb3F color_box_q16(cInitExpand);
			
 
				+
			
 
				+	float l = 1e+30f, h = -1e+30f;
			
 
				+	vec3F low_color_q16, high_color_q16;
			
 
				+
			
 
				+	for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+	{
			
 
				+		color_box_q16.expand(block_pixels_q16[i]);
			
 
				+
			
 
				+		vec3F k(vec3F(block_pixels_q16[i]) - block_mean_color_q16);
			
 
				+		float kd = k.dot(block_axis_q16);
			
 
				+
			
 
				+		if (kd < l)
			
 
				+		{
			
 
				+			l = kd;
			
 
				+			low_color_q16 = block_pixels_q16[i];
			
 
				+		}
			
 
				+
			
 
				+		if (kd > h)
			
 
				+		{
			
 
				+			h = kd;
			
 
				+			high_color_q16 = block_pixels_q16[i];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
			
 
				+	for (uint32_t i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
			
 
				+		high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
			
 
				+	}
			
 
				+		
			
 
				+	uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
			
 
				+	uint8_t trial_blk_weights[16];
			
 
				+	uint32_t trial_best_submode = 0;
			
 
				+	
			
 
				+	clear_obj(trial_blk_endpoints);
			
 
				+	clear_obj(trial_blk_weights);
			
 
				+	
			
 
				+	double trial_blk_error = 1e+30f;
			
 
				+
			
 
				+	bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
			
 
				+		low_color_q16, high_color_q16,
			
 
				+		block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
			
 
				+		first_submode, last_submode);
			
 
				+	
			
 
				+	// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
			
 
				+	if (!did_improve)
			
 
				+		return cur_block_error;
			
 
				+
			
 
				+	// Did the solution improve?
			
 
				+	if (trial_blk_error < cur_block_error)
			
 
				+	{
			
 
				+		cur_block_error = trial_blk_error;
			
 
				+		memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
			
 
				+		memcpy(blk_weights, trial_blk_weights, num_pixels);
			
 
				+		best_submode = trial_best_submode;
			
 
				+	}
			
 
				+		
			
 
				+#define USE_LEAST_SQUARES (1)
			
 
				+#if USE_LEAST_SQUARES
			
 
				+	// least squares on the most promising trial weight indices found
			
 
				+	const uint32_t NUM_LS_PASSES = 3;
			
 
				+
			
 
				+	for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
			
 
				+	{
			
 
				+		vec3F l_q16, h_q16;
			
 
				+		if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
			
 
				+			break;
			
 
				+
			
 
				+		bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+			l_q16, h_q16,
			
 
				+			block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
			
 
				+			first_submode, last_submode);
			
 
				+
			
 
				+		if (!was_improved)
			
 
				+			break;
			
 
				+
			
 
				+		// It's improved, so let's take the new weight indices.
			
 
				+		memcpy(trial_blk_weights, blk_weights, num_pixels);
			
 
				+
			
 
				+	} // pass
			
 
				+#endif
			
 
				+		
			
 
				+	if (uber_mode)
			
 
				+	{
			
 
				+		// Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost.
			
 
				+
			
 
				+		uint8_t temp_astc_weights[16];
			
 
				+		memcpy(temp_astc_weights, trial_blk_weights, num_pixels);
			
 
				+
			
 
				+		uint32_t min_lin_sel = 256, max_lin_sel = 0;
			
 
				+		for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+		{
			
 
				+			const uint32_t astc_sel = temp_astc_weights[i];
			
 
				+
			
 
				+			const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
			
 
				+			assert(lin_sel < num_weight_levels);
			
 
				+
			
 
				+			min_lin_sel = minimumu(min_lin_sel, lin_sel);
			
 
				+			max_lin_sel = maximumu(max_lin_sel, lin_sel);
			
 
				+		}
			
 
				+
			
 
				+		bool was_improved = false;
			
 
				+		(void)was_improved;
			
 
				+
			
 
				+		{
			
 
				+			bool weights_changed = false;
			
 
				+			uint8_t trial_weights[16];
			
 
				+			for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+			{
			
 
				+				uint32_t astc_sel = temp_astc_weights[i];
			
 
				+				uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
			
 
				+
			
 
				+				if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
			
 
				+				{
			
 
				+					lin_sel++;
			
 
				+					weights_changed = true;
			
 
				+				}
			
 
				+
			
 
				+				trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
			
 
				+			}
			
 
				+
			
 
				+			if (weights_changed)
			
 
				+			{
			
 
				+				vec3F l_q16, h_q16;
			
 
				+				if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
			
 
				+				{
			
 
				+					if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+						l_q16, h_q16,
			
 
				+						block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors, 
			
 
				+						first_submode, last_submode))
			
 
				+					{
			
 
				+						was_improved = true;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			bool weights_changed = false;
			
 
				+			uint8_t trial_weights[16];
			
 
				+			for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+			{
			
 
				+				uint32_t astc_sel = temp_astc_weights[i];
			
 
				+				uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
			
 
				+
			
 
				+				if ((lin_sel == max_lin_sel) && (lin_sel > 0))
			
 
				+				{
			
 
				+					lin_sel--;
			
 
				+					weights_changed = true;
			
 
				+				}
			
 
				+
			
 
				+				trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
			
 
				+			}
			
 
				+
			
 
				+			if (weights_changed)
			
 
				+			{
			
 
				+				vec3F l_q16, h_q16;
			
 
				+				if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
			
 
				+				{
			
 
				+					if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+						l_q16, h_q16,
			
 
				+						block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
			
 
				+						first_submode, last_submode))
			
 
				+					{
			
 
				+						was_improved = true;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			bool weights_changed = false;
			
 
				+			uint8_t trial_weights[16];
			
 
				+			for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+			{
			
 
				+				uint32_t astc_sel = temp_astc_weights[i];
			
 
				+				uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
			
 
				+
			
 
				+				if ((lin_sel == max_lin_sel) && (lin_sel > 0))
			
 
				+				{
			
 
				+					lin_sel--;
			
 
				+					weights_changed = true;
			
 
				+				}
			
 
				+				else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
			
 
				+				{
			
 
				+					lin_sel++;
			
 
				+					weights_changed = true;
			
 
				+				}
			
 
				+
			
 
				+				trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
			
 
				+			}
			
 
				+
			
 
				+			if (weights_changed)
			
 
				+			{
			
 
				+				vec3F l_q16, h_q16;
			
 
				+				if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
			
 
				+				{
			
 
				+					if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+						l_q16, h_q16,
			
 
				+						block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
			
 
				+						first_submode, last_submode))
			
 
				+					{
			
 
				+						was_improved = true;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	} // uber_mode
			
 
				+
			
 
				+	return cur_block_error;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static double encode_astc_hdr_block_mode_7(
			
 
				+	uint32_t num_pixels, const vec4F* pBlock_pixels,
			
 
				+	uint32_t ise_weight_range,
			
 
				+	uint32_t& best_submode,
			
 
				+	double cur_block_error,
			
 
				+	uint8_t* blk_endpoints,  //[4]
			
 
				+	uint8_t* blk_weights, // [num_pixels]
			
 
				+	const astc_hdr_codec_options& coptions,
			
 
				+	uint32_t ise_endpoint_range)
			
 
				+{
			
 
				+	assert((num_pixels >= 1) && (num_pixels <= 16));
			
 
				+	assert((ise_weight_range >= 1) && (ise_weight_range <= 10));
			
 
				+	assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+	const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
			
 
				+
			
 
				+	const uint32_t MAX_WEIGHT_LEVELS = 24;
			
 
				+	assert(num_weight_levels <= MAX_WEIGHT_LEVELS);
			
 
				+	BASISU_NOTE_UNUSED(MAX_WEIGHT_LEVELS);
			
 
				+
			
 
				+	best_submode = 0;
			
 
				+
			
 
				+	half_float block_pixels_half[16][3];
			
 
				+
			
 
				+	vec4F block_pixels_q16[16];
			
 
				+	for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+	{
			
 
				+		block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]);
			
 
				+		block_pixels_q16[i][0] = (float)half_to_qlog16(block_pixels_half[i][0]);
			
 
				+
			
 
				+		block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]);
			
 
				+		block_pixels_q16[i][1] = (float)half_to_qlog16(block_pixels_half[i][1]);
			
 
				+
			
 
				+		block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]);
			
 
				+		block_pixels_q16[i][2] = (float)half_to_qlog16(block_pixels_half[i][2]);
			
 
				+
			
 
				+		block_pixels_q16[i][3] = 0.0f;
			
 
				+	}
			
 
				+
			
 
				+	vec3F block_mean_color_q16(calc_mean(num_pixels, block_pixels_q16));
			
 
				+
			
 
				+	vec3F block_axis_q16(0.577350259f);
			
 
				+
			
 
				+	aabb3F color_box_q16(cInitExpand);
			
 
				+
			
 
				+	float l = 1e+30f, h = -1e+30f;
			
 
				+	for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+	{
			
 
				+		color_box_q16.expand(block_pixels_q16[i]);
			
 
				+
			
 
				+		vec3F k(vec3F(block_pixels_q16[i]) - block_mean_color_q16);
			
 
				+		float kd = k.dot(block_axis_q16);
			
 
				+
			
 
				+		l = basisu::minimum<float>(l, kd);
			
 
				+		h = basisu::maximum<float>(h, kd);
			
 
				+	}
			
 
				+
			
 
				+	vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16));
			
 
				+	vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16));
			
 
				+
			
 
				+	low_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
			
 
				+	high_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
			
 
				+
			
 
				+	vec3F diff(high_color_q16 - low_color_q16);
			
 
				+	float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0];
			
 
				+
			
 
				+	uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS];
			
 
				+	uint8_t trial_blk_weights[16];
			
 
				+	uint32_t trial_best_submode = 0;
			
 
				+
			
 
				+	clear_obj(trial_blk_endpoints);
			
 
				+	clear_obj(trial_blk_weights);
			
 
				+
			
 
				+	double trial_blk_error = 1e+30f;
			
 
				+
			
 
				+	bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
			
 
				+		high_color_q16, ceilf(s_q16),
			
 
				+		block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range);
			
 
				+
			
 
				+	// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
			
 
				+	if (!did_improve)
			
 
				+	{
			
 
				+		return cur_block_error;
			
 
				+	}
			
 
				+
			
 
				+	// Did the solution improve?
			
 
				+	if (trial_blk_error < cur_block_error)
			
 
				+	{
			
 
				+		cur_block_error = trial_blk_error;
			
 
				+		memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS);
			
 
				+		memcpy(blk_weights, trial_blk_weights, num_pixels);
			
 
				+		best_submode = trial_best_submode;
			
 
				+	}
			
 
				+
			
 
				+	const float one_over_num_pixels = 1.0f / (float)num_pixels;
			
 
				+
			
 
				+	const uint32_t NUM_TRIALS = 2;
			
 
				+	for (uint32_t trial = 0; trial < NUM_TRIALS; trial++)
			
 
				+	{
			
 
				+		// Given a set of selectors and S, try to compute a better high color
			
 
				+		vec3F new_high_color_q16(block_mean_color_q16);
			
 
				+
			
 
				+		int e[2][3];
			
 
				+		int cur_s = 0;
			
 
				+		if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range))
			
 
				+			break;
			
 
				+
			
 
				+		cur_s <<= 4;
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+		{
			
 
				+			uint32_t astc_sel = trial_blk_weights[i];
			
 
				+			float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
			
 
				+
			
 
				+			float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;
			
 
				+			new_high_color_q16[0] += k;
			
 
				+			new_high_color_q16[1] += k;
			
 
				+			new_high_color_q16[2] += k;
			
 
				+		}
			
 
				+
			
 
				+		bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+			new_high_color_q16, (float)cur_s,
			
 
				+			block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range);
			
 
				+
			
 
				+		if (improved)
			
 
				+		{
			
 
				+			memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
			
 
				+			memcpy(trial_blk_weights, blk_weights, num_pixels);
			
 
				+		}
			
 
				+
			
 
				+		// Given a set of selectors and a high color, try to compute a better S.
			
 
				+		float t = 0.0f;
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_pixels; i++)
			
 
				+		{
			
 
				+			uint32_t astc_sel = trial_blk_weights[i];
			
 
				+			float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
			
 
				+
			
 
				+			t += (1.0f) - lerp;
			
 
				+		}
			
 
				+
			
 
				+		t *= one_over_num_pixels;
			
 
				+
			
 
				+		//int e[2][3];
			
 
				+		if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range))
			
 
				+			break;
			
 
				+
			
 
				+		vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4));
			
 
				+
			
 
				+		if (fabs(t) > .0000125f)
			
 
				+		{
			
 
				+			float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;
			
 
				+			float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;
			
 
				+			float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;
			
 
				+
			
 
				+			// TODO: gather statistics on these
			
 
				+			if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+				cur_h_q16, ceilf(s_r),
			
 
				+				block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
			
 
				+			{
			
 
				+				improved = true;
			
 
				+			}
			
 
				+
			
 
				+			if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+				cur_h_q16, ceilf(s_g),
			
 
				+				block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
			
 
				+			{
			
 
				+				improved = true;
			
 
				+			}
			
 
				+
			
 
				+			if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+				cur_h_q16, ceilf(s_b),
			
 
				+				block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
			
 
				+			{
			
 
				+				improved = true;
			
 
				+			}
			
 
				+
			
 
				+			if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
			
 
				+				cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f),
			
 
				+				block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
			
 
				+			{
			
 
				+				improved = true;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (!improved)
			
 
				+			break;
			
 
				+
			
 
				+		memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
			
 
				+		memcpy(trial_blk_weights, blk_weights, num_pixels);
			
 
				+
			
 
				+	} // trial
			
 
				+
			
 
				+	return cur_block_error;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static bool pack_solid(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions)
			
 
				+{
			
 
				+	float r = 0.0f, g = 0.0f, b = 0.0f;
			
 
				+
			
 
				+	const float LOG_BIAS = .125f;
			
 
				+
			
 
				+	bool solid_block = true;
			
 
				+	for (uint32_t i = 0; i < 16; i++)
			
 
				+	{
			
 
				+		if ((pBlock_linear_colors[0][0] != pBlock_linear_colors[i][0]) ||
			
 
				+			(pBlock_linear_colors[0][1] != pBlock_linear_colors[i][1]) ||
			
 
				+			(pBlock_linear_colors[0][2] != pBlock_linear_colors[i][2]))
			
 
				+		{
			
 
				+			solid_block = false;
			
 
				+		}
			
 
				+
			
 
				+		r += log2f(pBlock_linear_colors[i][0] + LOG_BIAS);
			
 
				+		g += log2f(pBlock_linear_colors[i][1] + LOG_BIAS);
			
 
				+		b += log2f(pBlock_linear_colors[i][2] + LOG_BIAS);
			
 
				+	}
			
 
				+
			
 
				+	if (solid_block)
			
 
				+	{
			
 
				+		r = pBlock_linear_colors[0][0];
			
 
				+		g = pBlock_linear_colors[0][1];
			
 
				+		b = pBlock_linear_colors[0][2];
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		r = maximum<float>(0.0f, powf(2.0f, r * (1.0f / 16.0f)) - LOG_BIAS);
			
 
				+		g = maximum<float>(0.0f, powf(2.0f, g * (1.0f / 16.0f)) - LOG_BIAS);
			
 
				+		b = maximum<float>(0.0f, powf(2.0f, b * (1.0f / 16.0f)) - LOG_BIAS);
			
 
				+
			
 
				+		// for safety
			
 
				+		r = minimum<float>(r, MAX_HALF_FLOAT);
			
 
				+		g = minimum<float>(g, MAX_HALF_FLOAT);
			
 
				+		b = minimum<float>(b, MAX_HALF_FLOAT);
			
 
				+	}
			
 
				+
			
 
				+	half_float rh = float_to_half_non_neg_no_nan_inf(r), gh = float_to_half_non_neg_no_nan_inf(g), bh = float_to_half_non_neg_no_nan_inf(b), ah = float_to_half_non_neg_no_nan_inf(1.0f);
			
 
				+
			
 
				+	astc_hdr_pack_results results;
			
 
				+	results.clear();
			
 
				+
			
 
				+	uint8_t* packed_blk = (uint8_t*)&results.m_solid_blk;
			
 
				+	results.m_is_solid = true;
			
 
				+
			
 
				+	packed_blk[0] = 0b11111100;
			
 
				+	packed_blk[1] = 255;
			
 
				+	packed_blk[2] = 255;
			
 
				+	packed_blk[3] = 255;
			
 
				+	packed_blk[4] = 255;
			
 
				+	packed_blk[5] = 255;
			
 
				+	packed_blk[6] = 255;
			
 
				+	packed_blk[7] = 255;
			
 
				+
			
 
				+	packed_blk[8] = (uint8_t)rh;
			
 
				+	packed_blk[9] = (uint8_t)(rh >> 8);
			
 
				+	packed_blk[10] = (uint8_t)gh;
			
 
				+	packed_blk[11] = (uint8_t)(gh >> 8);
			
 
				+	packed_blk[12] = (uint8_t)bh;
			
 
				+	packed_blk[13] = (uint8_t)(bh >> 8);
			
 
				+	packed_blk[14] = (uint8_t)ah;
			
 
				+	packed_blk[15] = (uint8_t)(ah >> 8);
			
 
				+
			
 
				+	results.m_best_block_error = 0;
			
 
				+
			
 
				+	if (!solid_block)
			
 
				+	{
			
 
				+		const float R_WEIGHT = coptions.m_r_err_scale;
			
 
				+		const float G_WEIGHT = coptions.m_g_err_scale;
			
 
				+
			
 
				+		// This MUST match how errors are computed in eval_selectors().
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			half_float dr = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]), dg = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]), db = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]);
			
 
				+			double rd = q(rh) - q(dr);
			
 
				+			double gd = q(gh) - q(dg);
			
 
				+			double bd = q(bh) - q(db);
			
 
				+
			
 
				+			double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
			
 
				+
			
 
				+			results.m_best_block_error += e;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	const half_float hc[3] = { rh, gh, bh };
			
 
				+
			
 
				+	bc6h_enc_block_solid_color(&results.m_bc6h_block, hc);
			
 
				+
			
 
				+	all_results.push_back(results);
			
 
				+
			
 
				+	return solid_block;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static void pack_mode11(
			
 
				+	const vec4F* pBlock_linear_colors, 
			
 
				+	basisu::vector<astc_hdr_pack_results>& all_results, 
			
 
				+	const astc_hdr_codec_options& coptions, 
			
 
				+	uint32_t first_weight_ise_range, uint32_t last_weight_ise_range, bool constrain_ise_weight8_selectors)
			
 
				+{
			
 
				+	uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16];
			
 
				+	uint32_t trial_submode11 = 0;
			
 
				+
			
 
				+	clear_obj(trial_endpoints);
			
 
				+	clear_obj(trial_weights);
			
 
				+		
			
 
				+	for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++)
			
 
				+	{
			
 
				+		const bool direct_only = coptions.m_mode11_direct_only;
			
 
				+		
			
 
				+		uint32_t endpoint_ise_range = astc_helpers::BISE_256_LEVELS;
			
 
				+		if (weight_ise_range == astc_helpers::BISE_16_LEVELS)
			
 
				+			endpoint_ise_range = astc_helpers::BISE_192_LEVELS;
			
 
				+		else
			
 
				+		{
			
 
				+			assert(weight_ise_range < astc_helpers::BISE_16_LEVELS);
			
 
				+		}
			
 
				+				
			
 
				+		double trial_error = encode_astc_hdr_block_mode_11(16, pBlock_linear_colors, weight_ise_range, trial_submode11, 1e+30f, trial_endpoints, trial_weights, coptions, direct_only, 
			
 
				+			endpoint_ise_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, constrain_ise_weight8_selectors, coptions.m_first_mode11_submode, coptions.m_last_mode11_submode);
			
 
				+
			
 
				+		if (trial_error < 1e+30f)
			
 
				+		{
			
 
				+			astc_hdr_pack_results results;
			
 
				+			results.clear();
			
 
				+
			
 
				+			results.m_best_block_error = trial_error;
			
 
				+
			
 
				+			results.m_best_submodes[0] = trial_submode11;
			
 
				+			results.m_constrained_weights = constrain_ise_weight8_selectors;
			
 
				+						
			
 
				+			results.m_best_blk.m_num_partitions = 1;
			
 
				+			results.m_best_blk.m_color_endpoint_modes[0] = 11;
			
 
				+			results.m_best_blk.m_weight_ise_range = weight_ise_range;
			
 
				+			results.m_best_blk.m_endpoint_ise_range = endpoint_ise_range;
			
 
				+			
			
 
				+			memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
			
 
				+			memcpy(results.m_best_blk.m_weights, trial_weights, 16);
			
 
				+
			
 
				+#ifdef _DEBUG
			
 
				+			{
			
 
				+				half_float block_pixels_half[16][3];
			
 
				+
			
 
				+				vec4F block_pixels_q16[16];
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+				{
			
 
				+					block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]);
			
 
				+					block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]);
			
 
				+					block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]);
			
 
				+				}
			
 
				+				
			
 
				+				half_float unpacked_astc_blk_rgba[4][4][4];
			
 
				+				bool res = astc_helpers::decode_block(results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
			
 
				+				assert(res);
			
 
				+
			
 
				+				half_float unpacked_astc_blk_rgb[4][4][3];
			
 
				+				for (uint32_t y = 0; y < 4; y++)
			
 
				+					for (uint32_t x = 0; x < 4; x++)
			
 
				+						for (uint32_t c = 0; c < 3; c++)
			
 
				+							unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c];
			
 
				+
			
 
				+				double cmp_err = compute_block_error(&block_pixels_half[0][0], &unpacked_astc_blk_rgb[0][0][0], coptions);
			
 
				+				assert(results.m_best_block_error == cmp_err);
			
 
				+			}
			
 
				+#endif
			
 
				+
			
 
				+			// transcode to BC6H
			
 
				+			assert(results.m_best_blk.m_color_endpoint_modes[0] == 11);
			
 
				+			
			
 
				+			// Get qlog12 endpoints
			
 
				+			int e[2][3];
			
 
				+			bool success = decode_mode11_to_qlog12(results.m_best_blk.m_endpoints, e, results.m_best_blk.m_endpoint_ise_range);
			
 
				+			assert(success);
			
 
				+			BASISU_NOTE_UNUSED(success);
			
 
				+
			
 
				+			// Transform endpoints to half float
			
 
				+			half_float h_e[3][2] =
			
 
				+			{
			
 
				+				{ qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
			
 
				+				{ qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
			
 
				+				{ qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
			
 
				+			};
			
 
				+
			
 
				+			// Transcode to bc6h
			
 
				+			success = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block);
			
 
				+			assert(success);
			
 
				+
			
 
				+			all_results.push_back(results);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static void pack_mode7_single_part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions)
			
 
				+{
			
 
				+	uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16];
			
 
				+	uint32_t trial_submode7 = 0;
			
 
				+
			
 
				+	clear_obj(trial_endpoints);
			
 
				+	clear_obj(trial_weights);
			
 
				+
			
 
				+	for (uint32_t weight_ise_range = coptions.m_first_mode7_part1_weight_ise_range; weight_ise_range <= coptions.m_last_mode7_part1_weight_ise_range; weight_ise_range++)
			
 
				+	{
			
 
				+		const uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS;
			
 
				+
			
 
				+		double trial_error = encode_astc_hdr_block_mode_7(16, pBlock_linear_colors, weight_ise_range, trial_submode7, 1e+30f, trial_endpoints, trial_weights, coptions, ise_endpoint_range);
			
 
				+
			
 
				+		if (trial_error < 1e+30f)
			
 
				+		{
			
 
				+			astc_hdr_pack_results results;
			
 
				+			results.clear();
			
 
				+
			
 
				+			results.m_best_block_error = trial_error;
			
 
				+
			
 
				+			results.m_best_submodes[0] = trial_submode7;
			
 
				+			
			
 
				+			results.m_best_blk.m_num_partitions = 1;
			
 
				+			results.m_best_blk.m_color_endpoint_modes[0] = 7;
			
 
				+			results.m_best_blk.m_weight_ise_range = weight_ise_range;
			
 
				+			results.m_best_blk.m_endpoint_ise_range = ise_endpoint_range;
			
 
				+			
			
 
				+			memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
			
 
				+			memcpy(results.m_best_blk.m_weights, trial_weights, 16);
			
 
				+
			
 
				+			// transcode to BC6H
			
 
				+			assert(results.m_best_blk.m_color_endpoint_modes[0] == 7);
			
 
				+			
			
 
				+			// Get qlog12 endpoints
			
 
				+			int e[2][3];
			
 
				+			if (!decode_mode7_to_qlog12(results.m_best_blk.m_endpoints, e, nullptr, results.m_best_blk.m_endpoint_ise_range))
			
 
				+				continue;
			
 
				+
			
 
				+			// Transform endpoints to half float
			
 
				+			half_float h_e[3][2] =
			
 
				+			{
			
 
				+				{ qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
			
 
				+				{ qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
			
 
				+				{ qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
			
 
				+			};
			
 
				+
			
 
				+			// Transcode to bc6h
			
 
				+			bool status = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block);
			
 
				+			assert(status);
			
 
				+			(void)status;
			
 
				+
			
 
				+			all_results.push_back(results);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static bool estimate_partition2(const vec4F* pBlock_pixels, int* pBest_parts, uint32_t num_best_parts)
			
 
				+{
			
 
				+	assert(num_best_parts <= basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
			
 
				+
			
 
				+	vec3F training_vecs[16], mean(0.0f);
			
 
				+
			
 
				+	for (uint32_t i = 0; i < 16; i++)
			
 
				+	{
			
 
				+		vec3F& v = training_vecs[i];
			
 
				+
			
 
				+		v[0] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]);
			
 
				+		v[1] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]);
			
 
				+		v[2] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]);
			
 
				+
			
 
				+		mean += v;
			
 
				+	}
			
 
				+	mean *= (1.0f / 16.0f);
			
 
				+
			
 
				+	vec3F cluster_centroids[2] = { mean - vec3F(.1f), mean + vec3F(.1f) };
			
 
				+
			
 
				+	uint32_t cluster_pixels[2][16];
			
 
				+	uint32_t num_cluster_pixels[2];
			
 
				+	vec3F new_cluster_means[2];
			
 
				+
			
 
				+	for (uint32_t s = 0; s < 4; s++)
			
 
				+	{
			
 
				+		num_cluster_pixels[0] = 0;
			
 
				+		num_cluster_pixels[1] = 0;
			
 
				+
			
 
				+		new_cluster_means[0].clear();
			
 
				+		new_cluster_means[1].clear();
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			float d0 = training_vecs[i].squared_distance(cluster_centroids[0]);
			
 
				+			float d1 = training_vecs[i].squared_distance(cluster_centroids[1]);
			
 
				+
			
 
				+			if (d0 < d1)
			
 
				+			{
			
 
				+				cluster_pixels[0][num_cluster_pixels[0]] = i;
			
 
				+				new_cluster_means[0] += training_vecs[i];
			
 
				+				num_cluster_pixels[0]++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				cluster_pixels[1][num_cluster_pixels[1]] = i;
			
 
				+				new_cluster_means[1] += training_vecs[i];
			
 
				+				num_cluster_pixels[1]++;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (!num_cluster_pixels[0] || !num_cluster_pixels[1])
			
 
				+			return false;
			
 
				+
			
 
				+		cluster_centroids[0] = new_cluster_means[0] / (float)num_cluster_pixels[0];
			
 
				+		cluster_centroids[1] = new_cluster_means[1] / (float)num_cluster_pixels[1];
			
 
				+	}
			
 
				+
			
 
				+	int desired_parts[4][4]; // [y][x]
			
 
				+	for (uint32_t p = 0; p < 2; p++)
			
 
				+	{
			
 
				+		for (uint32_t i = 0; i < num_cluster_pixels[p]; i++)
			
 
				+		{
			
 
				+			const uint32_t pix_index = cluster_pixels[p][i];
			
 
				+
			
 
				+			desired_parts[pix_index >> 2][pix_index & 3] = p;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	uint32_t part_similarity[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2];
			
 
				+
			
 
				+	for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; part_index++)
			
 
				+	{
			
 
				+		const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
			
 
				+
			
 
				+		int total_sim_non_inv = 0;
			
 
				+		int total_sim_inv = 0;
			
 
				+
			
 
				+		for (uint32_t y = 0; y < 4; y++)
			
 
				+		{
			
 
				+			for (uint32_t x = 0; x < 4; x++)
			
 
				+			{
			
 
				+				int part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
			
 
				+
			
 
				+				if (part == desired_parts[y][x])
			
 
				+					total_sim_non_inv++;
			
 
				+
			
 
				+				if ((part ^ 1) == desired_parts[y][x])
			
 
				+					total_sim_inv++;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		int total_sim = maximum(total_sim_non_inv, total_sim_inv);
			
 
				+
			
 
				+		part_similarity[part_index] = (total_sim << 8) | part_index;
			
 
				+
			
 
				+	} // part_index;
			
 
				+
			
 
				+	std::sort(part_similarity, part_similarity + basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
			
 
				+
			
 
				+	for (uint32_t i = 0; i < num_best_parts; i++)
			
 
				+		pBest_parts[i] = part_similarity[(basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 - 1) - i] & 0xFF;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static void pack_mode7_2part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions,
			
 
				+	int num_estimated_partitions, const int *pEstimated_partitions,
			
 
				+	uint32_t first_weight_ise_range, uint32_t last_weight_ise_range)
			
 
				+{
			
 
				+	assert(coptions.m_mode7_part2_part_masks);
			
 
				+
			
 
				+	astc_helpers::log_astc_block trial_blk;
			
 
				+	clear_obj(trial_blk);
			
 
				+	trial_blk.m_grid_width = 4;
			
 
				+	trial_blk.m_grid_height = 4;
			
 
				+
			
 
				+	trial_blk.m_num_partitions = 2;
			
 
				+	trial_blk.m_color_endpoint_modes[0] = 7;
			
 
				+	trial_blk.m_color_endpoint_modes[1] = 7;
			
 
				+
			
 
				+	uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2;
			
 
				+		
			
 
				+	if (num_estimated_partitions)
			
 
				+	{
			
 
				+		first_part_index = 0;
			
 
				+		last_part_index = num_estimated_partitions;
			
 
				+	}
			
 
				+	
			
 
				+	for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter)
			
 
				+	{
			
 
				+		uint32_t part_index;
			
 
				+		if (num_estimated_partitions)
			
 
				+		{
			
 
				+			part_index = pEstimated_partitions[part_index_iter];
			
 
				+			assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			part_index = part_index_iter;
			
 
				+			if (((1U << part_index) & coptions.m_mode7_part2_part_masks) == 0)
			
 
				+				continue;
			
 
				+		}
			
 
				+								
			
 
				+		const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
			
 
				+		const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
			
 
				+		const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert;
			
 
				+
			
 
				+		vec4F part_pixels[2][16];
			
 
				+		uint32_t pixel_part_index[4][4]; // [y][x]
			
 
				+		uint32_t num_part_pixels[2] = { 0, 0 };
			
 
				+
			
 
				+		// Extract each subset's texels for this partition pattern
			
 
				+		for (uint32_t y = 0; y < 4; y++)
			
 
				+		{
			
 
				+			for (uint32_t x = 0; x < 4; x++)
			
 
				+			{
			
 
				+				uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
			
 
				+				if (invert_flag)
			
 
				+					part = 1 - part;
			
 
				+
			
 
				+				pixel_part_index[y][x] = part;
			
 
				+				part_pixels[part][num_part_pixels[part]] = pBlock_linear_colors[x + y * 4];
			
 
				+
			
 
				+				num_part_pixels[part]++;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		trial_blk.m_partition_id = astc_pattern;
			
 
				+				
			
 
				+		for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++)
			
 
				+		{
			
 
				+			assert(weight_ise_range <= astc_helpers::BISE_8_LEVELS);
			
 
				+
			
 
				+			uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS;
			
 
				+			if (weight_ise_range == astc_helpers::BISE_5_LEVELS)
			
 
				+				ise_endpoint_range = astc_helpers::BISE_192_LEVELS;
			
 
				+			else if (weight_ise_range == astc_helpers::BISE_6_LEVELS)
			
 
				+				ise_endpoint_range = astc_helpers::BISE_128_LEVELS;
			
 
				+			else if (weight_ise_range == astc_helpers::BISE_8_LEVELS)
			
 
				+				ise_endpoint_range = astc_helpers::BISE_80_LEVELS;
			
 
				+
			
 
				+			uint8_t trial_endpoints[2][NUM_MODE7_ENDPOINTS], trial_weights[2][16];
			
 
				+			uint32_t trial_submode7[2];
			
 
				+
			
 
				+			clear_obj(trial_endpoints);
			
 
				+			clear_obj(trial_weights);
			
 
				+			clear_obj(trial_submode7);
			
 
				+
			
 
				+			double total_trial_err = 0;
			
 
				+			for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
			
 
				+			{
			
 
				+				total_trial_err += encode_astc_hdr_block_mode_7(
			
 
				+					num_part_pixels[pack_part_index], &part_pixels[pack_part_index][0],
			
 
				+					weight_ise_range, trial_submode7[pack_part_index], 1e+30f,
			
 
				+					&trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, ise_endpoint_range);
			
 
				+
			
 
				+			} // pack_part_index
			
 
				+
			
 
				+			if (total_trial_err < 1e+30f)
			
 
				+			{
			
 
				+				trial_blk.m_weight_ise_range = weight_ise_range;
			
 
				+				trial_blk.m_endpoint_ise_range = ise_endpoint_range;
			
 
				+
			
 
				+				for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
			
 
				+					memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE7_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE7_ENDPOINTS);
			
 
				+
			
 
				+				uint32_t src_pixel_index[2] = { 0, 0 };
			
 
				+				for (uint32_t y = 0; y < 4; y++)
			
 
				+				{
			
 
				+					for (uint32_t x = 0; x < 4; x++)
			
 
				+					{
			
 
				+						uint32_t p = pixel_part_index[y][x];
			
 
				+						trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++];
			
 
				+					}
			
 
				+				}
			
 
				+								
			
 
				+				astc_hdr_pack_results results;
			
 
				+				results.clear();
			
 
				+
			
 
				+				results.m_best_block_error = total_trial_err;
			
 
				+				results.m_best_submodes[0] = trial_submode7[0];
			
 
				+				results.m_best_submodes[1] = trial_submode7[1];
			
 
				+				results.m_best_pat_index = part_index;
			
 
				+
			
 
				+				results.m_best_blk = trial_blk;
			
 
				+
			
 
				+				bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block);
			
 
				+				assert(status);
			
 
				+				BASISU_NOTE_UNUSED(status);
			
 
				+
			
 
				+				all_results.push_back(results);
			
 
				+			}
			
 
				+
			
 
				+		} // weight_ise_range
			
 
				+
			
 
				+	} // part_index
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+static void pack_mode11_2part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions,
			
 
				+	int num_estimated_partitions, const int* pEstimated_partitions)
			
 
				+{
			
 
				+	assert(coptions.m_mode11_part2_part_masks);
			
 
				+
			
 
				+	astc_helpers::log_astc_block trial_blk;
			
 
				+	clear_obj(trial_blk);
			
 
				+	trial_blk.m_grid_width = 4;
			
 
				+	trial_blk.m_grid_height = 4;
			
 
				+
			
 
				+	trial_blk.m_num_partitions = 2;
			
 
				+	trial_blk.m_color_endpoint_modes[0] = 11;
			
 
				+	trial_blk.m_color_endpoint_modes[1] = 11;
			
 
				+			
			
 
				+	uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2;
			
 
				+
			
 
				+	if (num_estimated_partitions)
			
 
				+	{
			
 
				+		first_part_index = 0;
			
 
				+		last_part_index = num_estimated_partitions;
			
 
				+	}
			
 
				+
			
 
				+	for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter)
			
 
				+	{
			
 
				+		uint32_t part_index;
			
 
				+		if (num_estimated_partitions)
			
 
				+		{
			
 
				+			part_index = pEstimated_partitions[part_index_iter];
			
 
				+			assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			part_index = part_index_iter;
			
 
				+			if (((1U << part_index) & coptions.m_mode11_part2_part_masks) == 0)
			
 
				+				continue;
			
 
				+		}
			
 
				+
			
 
				+		const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
			
 
				+		const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
			
 
				+		const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert;
			
 
				+
			
 
				+		vec4F part_pixels[2][16];
			
 
				+		uint32_t pixel_part_index[4][4]; // [y][x]
			
 
				+		uint32_t num_part_pixels[2] = { 0, 0 };
			
 
				+
			
 
				+		// Extract each subset's texels for this partition pattern
			
 
				+		for (uint32_t y = 0; y < 4; y++)
			
 
				+		{
			
 
				+			for (uint32_t x = 0; x < 4; x++)
			
 
				+			{
			
 
				+				uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
			
 
				+				if (invert_flag)
			
 
				+					part = 1 - part;
			
 
				+
			
 
				+				pixel_part_index[y][x] = part;
			
 
				+				part_pixels[part][num_part_pixels[part]] = pBlock_linear_colors[x + y * 4];
			
 
				+
			
 
				+				num_part_pixels[part]++;
			
 
				+			}
			
 
				+		}
			
 
				+				
			
 
				+		trial_blk.m_partition_id = astc_pattern;
			
 
				+						
			
 
				+		for (uint32_t weight_ise_range = coptions.m_first_mode11_part2_weight_ise_range; weight_ise_range <= coptions.m_last_mode11_part2_weight_ise_range; weight_ise_range++)
			
 
				+		{
			
 
				+			bool direct_only = false;
			
 
				+			uint32_t ise_endpoint_range = astc_helpers::BISE_64_LEVELS;
			
 
				+			if (weight_ise_range == astc_helpers::BISE_4_LEVELS)
			
 
				+				ise_endpoint_range = astc_helpers::BISE_40_LEVELS;
			
 
				+
			
 
				+			uint8_t trial_endpoints[2][NUM_MODE11_ENDPOINTS], trial_weights[2][16];
			
 
				+			uint32_t trial_submode11[2];
			
 
				+
			
 
				+			clear_obj(trial_endpoints); 
			
 
				+			clear_obj(trial_weights);
			
 
				+			clear_obj(trial_submode11);
			
 
				+
			
 
				+			double total_trial_err = 0;
			
 
				+			for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
			
 
				+			{
			
 
				+				total_trial_err += encode_astc_hdr_block_mode_11(
			
 
				+					num_part_pixels[pack_part_index], &part_pixels[pack_part_index][0],
			
 
				+					weight_ise_range, trial_submode11[pack_part_index], 1e+30f,
			
 
				+					&trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions,
			
 
				+					direct_only, ise_endpoint_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, false,
			
 
				+					coptions.m_first_mode11_submode, coptions.m_last_mode11_submode);
			
 
				+
			
 
				+			} // pack_part_index
			
 
				+
			
 
				+			if (total_trial_err < 1e+30f)
			
 
				+			{
			
 
				+				trial_blk.m_weight_ise_range = weight_ise_range;
			
 
				+				trial_blk.m_endpoint_ise_range = ise_endpoint_range;
			
 
				+
			
 
				+				for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
			
 
				+					memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE11_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE11_ENDPOINTS);
			
 
				+
			
 
				+				uint32_t src_pixel_index[2] = { 0, 0 };
			
 
				+				for (uint32_t y = 0; y < 4; y++)
			
 
				+				{
			
 
				+					for (uint32_t x = 0; x < 4; x++)
			
 
				+					{
			
 
				+						uint32_t p = pixel_part_index[y][x];
			
 
				+						trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++];
			
 
				+					}
			
 
				+				}
			
 
				+								
			
 
				+				astc_hdr_pack_results results;
			
 
				+				results.clear();
			
 
				+
			
 
				+				results.m_best_block_error = total_trial_err;
			
 
				+				results.m_best_submodes[0] = trial_submode11[0];
			
 
				+				results.m_best_submodes[1] = trial_submode11[1];
			
 
				+				results.m_best_pat_index = part_index;
			
 
				+
			
 
				+				results.m_best_blk = trial_blk;
			
 
				+
			
 
				+				bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block);
			
 
				+				assert(status);
			
 
				+				BASISU_NOTE_UNUSED(status);
			
 
				+
			
 
				+				all_results.push_back(results);
			
 
				+			}
			
 
				+
			
 
				+		} // weight_ise_range
			
 
				+
			
 
				+	} // part_index
			
 
				+}
			
 
				+
			
 
				+//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+bool g_astc_hdr_enc_initialized;
			
 
				+
			
 
				+void astc_hdr_enc_init()
			
 
				+{
			
 
				+	if (g_astc_hdr_enc_initialized)
			
 
				+		return;
			
 
				+
			
 
				+	astc_hdr_core_init();
			
 
				+
			
 
				+	astc_helpers::init_tables(true);
			
 
				+			
			
 
				+	init_qlog_tables();
			
 
				+
			
 
				+	encode_astc_hdr_init();
			
 
				+								
			
 
				+	g_astc_hdr_enc_initialized = true;
			
 
				+}
			
 
				+
			
 
				+bool astc_hdr_enc_block(
			
 
				+	const float* pRGBPixels, 
			
 
				+	const astc_hdr_codec_options& coptions,
			
 
				+	basisu::vector<astc_hdr_pack_results>& all_results)
			
 
				+{
			
 
				+	assert(g_astc_hdr_enc_initialized);
			
 
				+	if (!g_astc_hdr_enc_initialized)
			
 
				+	{
			
 
				+		// astc_hdr_enc_init() MUST be called first.
			
 
				+		assert(0);
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	all_results.resize(0);
			
 
				+				
			
 
				+	vec4F block_linear_colors[16];
			
 
				+
			
 
				+	// Sanity check the input block.
			
 
				+	for (uint32_t i = 0; i < 16; i++)
			
 
				+	{
			
 
				+		for (uint32_t j = 0; j < 3; j++)
			
 
				+		{
			
 
				+			float v = pRGBPixels[i * 3 + j];
			
 
				+
			
 
				+			if (std::isinf(v) || std::isnan(v))
			
 
				+			{
			
 
				+				// Input pixels cannot be NaN or +-Inf.
			
 
				+				assert(0);
			
 
				+				return false;
			
 
				+			}
			
 
				+
			
 
				+			if (v < 0.0f)
			
 
				+			{
			
 
				+				// Input pixels cannot be signed.
			
 
				+				assert(0);
			
 
				+				return false;
			
 
				+			}
			
 
				+
			
 
				+			if (v > MAX_HALF_FLOAT)
			
 
				+			{
			
 
				+				// Too large for half float.
			
 
				+				assert(0);
			
 
				+				return false;
			
 
				+			}
			
 
				+			
			
 
				+			block_linear_colors[i][j] = v;
			
 
				+		}
			
 
				+		
			
 
				+		block_linear_colors[i][3] = 1.0f;
			
 
				+	}
			
 
				+
			
 
				+	assert(coptions.m_use_solid || coptions.m_use_mode11 || coptions.m_use_mode7_part2 || coptions.m_use_mode7_part1 || coptions.m_use_mode11_part2);
			
 
				+					
			
 
				+	bool is_solid = false;
			
 
				+	if (coptions.m_use_solid)
			
 
				+		is_solid = pack_solid(block_linear_colors, all_results, coptions);
			
 
				+
			
 
				+	if (!is_solid)
			
 
				+	{
			
 
				+		if (coptions.m_use_mode11)
			
 
				+		{
			
 
				+			const size_t cur_num_results = all_results.size();
			
 
				+
			
 
				+			pack_mode11(block_linear_colors, all_results, coptions, coptions.m_first_mode11_weight_ise_range, coptions.m_last_mode11_weight_ise_range, false);
			
 
				+
			
 
				+			if (coptions.m_last_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS)
			
 
				+			{
			
 
				+				pack_mode11(block_linear_colors, all_results, coptions, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_16_LEVELS, true);
			
 
				+			}
			
 
				+
			
 
				+			// If we couldn't get any mode 11 results at all, and we were restricted to just trying weight ISE range 8 (which required endpoint quantization) then 
			
 
				+			// fall back to weight ISE range 7 (which doesn't need any endpoint quantization).
			
 
				+			// This is to guarantee we always get at least 1 non-solid result.
			
 
				+			if (all_results.size() == cur_num_results)
			
 
				+			{
			
 
				+				if (coptions.m_first_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS)
			
 
				+				{
			
 
				+					pack_mode11(block_linear_colors, all_results, coptions, astc_helpers::BISE_12_LEVELS, astc_helpers::BISE_12_LEVELS, false);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+				
			
 
				+		if (coptions.m_use_mode7_part1)
			
 
				+		{
			
 
				+			// Mode 7 1-subset never requires endpoint quantization, so it cannot fail to find at least one usable solution.
			
 
				+			pack_mode7_single_part(block_linear_colors, all_results, coptions);
			
 
				+		}
			
 
				+				
			
 
				+		bool have_est = false;
			
 
				+		int best_parts[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2];
			
 
				+
			
 
				+		if ((coptions.m_use_mode7_part2) || (coptions.m_use_mode11_part2))
			
 
				+		{
			
 
				+			if (coptions.m_use_estimated_partitions)
			
 
				+				have_est = estimate_partition2(block_linear_colors, best_parts, coptions.m_max_estimated_partitions);
			
 
				+		}
			
 
				+
			
 
				+		if (coptions.m_use_mode7_part2)
			
 
				+		{
			
 
				+			const size_t cur_num_results = all_results.size();
			
 
				+
			
 
				+			pack_mode7_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts, 
			
 
				+				coptions.m_first_mode7_part2_weight_ise_range, coptions.m_last_mode7_part2_weight_ise_range);
			
 
				+
			
 
				+			// If we couldn't find any packable 2-subset mode 7 results at weight levels >= 5 levels (which always requires endpoint quant), then try falling back to 
			
 
				+			// 5 levels which doesn't require endpoint quantization.
			
 
				+			if (all_results.size() == cur_num_results)
			
 
				+			{
			
 
				+				if (coptions.m_first_mode7_part2_weight_ise_range >= astc_helpers::BISE_5_LEVELS)
			
 
				+				{
			
 
				+					pack_mode7_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts, 
			
 
				+						astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_4_LEVELS);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		
			
 
				+		if (coptions.m_use_mode11_part2)
			
 
				+		{
			
 
				+			// This always requires endpoint quant, so it could fail to find any usable solutions.
			
 
				+			pack_mode11_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (coptions.m_refine_weights)
			
 
				+	{
			
 
				+		// TODO: Move this above, do it once only.
			
 
				+		basist::half_float rgb_pixels_half[16 * 3];
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 0]);
			
 
				+			rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 1]);
			
 
				+			rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 2]);
			
 
				+		}
			
 
				+
			
 
				+		for (uint32_t i = 0; i < all_results.size(); i++)
			
 
				+		{
			
 
				+			bool status = astc_hdr_refine_weights(rgb_pixels_half, all_results[i], coptions, coptions.m_bc6h_err_weight, &all_results[i].m_improved_via_refinement_flag);
			
 
				+			assert(status);
			
 
				+			BASISU_NOTE_UNUSED(status);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+bool astc_hdr_pack_results_to_block(astc_blk& dst_blk, const astc_hdr_pack_results& results)
			
 
				+{
			
 
				+	assert(g_astc_hdr_enc_initialized);
			
 
				+	if (!g_astc_hdr_enc_initialized)
			
 
				+		return false;
			
 
				+
			
 
				+	if (results.m_is_solid)
			
 
				+	{
			
 
				+		memcpy(&dst_blk, &results.m_solid_blk, sizeof(results.m_solid_blk));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		bool status = astc_helpers::pack_astc_block((astc_helpers::astc_block&)dst_blk, results.m_best_blk);
			
 
				+		if (!status)
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+// Refines a block's chosen weight indices, balancing BC6H and ASTC HDR error.
			
 
				+bool astc_hdr_refine_weights(const half_float *pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool *pImproved_flag)
			
 
				+{
			
 
				+	if (pImproved_flag)
			
 
				+		*pImproved_flag = false;
			
 
				+
			
 
				+	if (cur_results.m_is_solid)
			
 
				+		return true;
			
 
				+
			
 
				+	const uint32_t total_weights = astc_helpers::get_ise_levels(cur_results.m_best_blk.m_weight_ise_range);
			
 
				+
			
 
				+	assert((total_weights >= 3) && (total_weights <= 16));
			
 
				+
			
 
				+	double best_err[4][4];
			
 
				+	uint8_t best_weight[4][4];
			
 
				+	for (uint32_t y = 0; y < 4; y++)
			
 
				+	{
			
 
				+		for (uint32_t x = 0; x < 4; x++)
			
 
				+		{
			
 
				+			best_err[y][x] = 1e+30f;
			
 
				+			best_weight[y][x] = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	astc_hdr_pack_results temp_results;
			
 
				+
			
 
				+	const float c_weights[3] = { coptions.m_r_err_scale, coptions.m_g_err_scale, 1.0f };
			
 
				+
			
 
				+	for (uint32_t weight_index = 0; weight_index < total_weights; weight_index++)
			
 
				+	{
			
 
				+		temp_results = cur_results;
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+			temp_results.m_best_blk.m_weights[i] = (uint8_t)weight_index;
			
 
				+		
			
 
				+		half_float unpacked_astc_blk_rgba[4][4][4];
			
 
				+		bool res = astc_helpers::decode_block(temp_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
			
 
				+		assert(res);
			
 
				+
			
 
				+		basist::bc6h_block trial_bc6h_blk;
			
 
				+		res = basist::astc_hdr_transcode_to_bc6h(temp_results.m_best_blk, trial_bc6h_blk);
			
 
				+		assert(res);
			
 
				+				
			
 
				+		half_float unpacked_bc6h_blk[4][4][3];
			
 
				+		res = unpack_bc6h(&trial_bc6h_blk, unpacked_bc6h_blk, false);
			
 
				+		assert(res);
			
 
				+		BASISU_NOTE_UNUSED(res);
			
 
				+
			
 
				+		for (uint32_t y = 0; y < 4; y++)
			
 
				+		{
			
 
				+			for (uint32_t x = 0; x < 4; x++)
			
 
				+			{
			
 
				+				double total_err = 0.0f;
			
 
				+
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+				{
			
 
				+					const half_float orig_c = pSource_block[(x + y * 4) * 3 + c];
			
 
				+					const double orig_c_q = q(orig_c);
			
 
				+					
			
 
				+					const half_float astc_c = unpacked_astc_blk_rgba[y][x][c];
			
 
				+					const double astc_c_q = q(astc_c);
			
 
				+					const double astc_e = square(astc_c_q - orig_c_q) * c_weights[c];
			
 
				+					
			
 
				+					const half_float bc6h_c = unpacked_bc6h_blk[y][x][c];
			
 
				+					const double bc6h_c_q = q(bc6h_c);
			
 
				+					const double bc6h_e = square(bc6h_c_q - orig_c_q) * c_weights[c];
			
 
				+
			
 
				+					const double overall_err = astc_e * (1.0f - bc6h_weight) + bc6h_e * bc6h_weight;
			
 
				+
			
 
				+					total_err += overall_err;
			
 
				+
			
 
				+				} //  c
			
 
				+
			
 
				+				if (total_err < best_err[y][x])
			
 
				+				{
			
 
				+					best_err[y][x] = total_err;
			
 
				+					best_weight[y][x] = (uint8_t)weight_index;
			
 
				+				}
			
 
				+
			
 
				+			} // x
			
 
				+		} // y
			
 
				+
			
 
				+	} // weight_index
			
 
				+
			
 
				+	bool any_changed = false;
			
 
				+	for (uint32_t i = 0; i < 16; i++)
			
 
				+	{
			
 
				+		if (cur_results.m_best_blk.m_weights[i] != best_weight[i >> 2][i & 3])
			
 
				+		{
			
 
				+			any_changed = true;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (any_changed)
			
 
				+	{
			
 
				+		memcpy(cur_results.m_best_blk.m_weights, best_weight, 16);
			
 
				+
			
 
				+		{
			
 
				+			bool res = basist::astc_hdr_transcode_to_bc6h(cur_results.m_best_blk, cur_results.m_bc6h_block);
			
 
				+			assert(res);
			
 
				+			BASISU_NOTE_UNUSED(res);
			
 
				+
			
 
				+			half_float unpacked_astc_blk_rgba[4][4][4];
			
 
				+			res = astc_helpers::decode_block(cur_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
			
 
				+			assert(res);
			
 
				+
			
 
				+			half_float unpacked_astc_blk_rgb[4][4][3];
			
 
				+			for (uint32_t y = 0; y < 4; y++)
			
 
				+				for (uint32_t x = 0; x < 4; x++)
			
 
				+					for (uint32_t c = 0; c < 3; c++)
			
 
				+						unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c];
			
 
				+
			
 
				+			cur_results.m_best_block_error = compute_block_error(pSource_block, &unpacked_astc_blk_rgb[0][0][0], coptions);
			
 
				+		}
			
 
				+
			
 
				+		if (pImproved_flag)
			
 
				+			*pImproved_flag = true;
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+void astc_hdr_block_stats::update(const astc_hdr_pack_results& log_blk)
			
 
				+{
			
 
				+	std::lock_guard<std::mutex> lck(m_mutex);
			
 
				+
			
 
				+	m_total_blocks++;
			
 
				+
			
 
				+	if (log_blk.m_improved_via_refinement_flag)
			
 
				+		m_total_refined++;
			
 
				+
			
 
				+	if (log_blk.m_is_solid)
			
 
				+	{
			
 
				+		m_total_solid++;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		int best_weight_range = log_blk.m_best_blk.m_weight_ise_range;
			
 
				+
			
 
				+		if (log_blk.m_best_blk.m_color_endpoint_modes[0] == 7)
			
 
				+		{
			
 
				+			m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 6U)]++;
			
 
				+
			
 
				+			if (log_blk.m_best_blk.m_num_partitions == 2)
			
 
				+			{
			
 
				+				m_total_mode7_2part++;
			
 
				+
			
 
				+				m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 6U)]++;
			
 
				+				m_total_2part++;
			
 
				+
			
 
				+				m_weight_range_hist_7_2part[bounds_check(best_weight_range, 0, 11)]++;
			
 
				+
			
 
				+				m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				m_total_mode7_1part++;
			
 
				+
			
 
				+				m_weight_range_hist_7[bounds_check(best_weight_range, 0, 11)]++;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 9U)]++;
			
 
				+			if (log_blk.m_constrained_weights)
			
 
				+				m_total_mode11_1part_constrained_weights++;
			
 
				+
			
 
				+			if (log_blk.m_best_blk.m_num_partitions == 2)
			
 
				+			{
			
 
				+				m_total_mode11_2part++;
			
 
				+
			
 
				+				m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 9U)]++;
			
 
				+				m_total_2part++;
			
 
				+
			
 
				+				m_weight_range_hist_11_2part[bounds_check(best_weight_range, 0, 11)]++;
			
 
				+
			
 
				+				m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				m_total_mode11_1part++;
			
 
				+
			
 
				+				m_weight_range_hist_11[bounds_check(best_weight_range, 0, 11)]++;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void astc_hdr_block_stats::print()
			
 
				+{
			
 
				+	std::lock_guard<std::mutex> lck(m_mutex);
			
 
				+
			
 
				+	assert(m_total_blocks);
			
 
				+	if (!m_total_blocks)
			
 
				+		return;
			
 
				+
			
 
				+	printf("\nLow-level ASTC Encoder Statistics:\n");
			
 
				+	printf("Total blocks: %u\n", m_total_blocks);
			
 
				+	printf("Total solid: %u %3.2f%%\n", m_total_solid, (m_total_solid * 100.0f) / m_total_blocks);
			
 
				+	printf("Total refined: %u %3.2f%%\n", m_total_refined, (m_total_refined * 100.0f) / m_total_blocks);
			
 
				+
			
 
				+	printf("Total mode 11, 1 partition: %u %3.2f%%\n", m_total_mode11_1part, (m_total_mode11_1part * 100.0f) / m_total_blocks);
			
 
				+	printf("Total mode 11, 1 partition, constrained weights: %u %3.2f%%\n", m_total_mode11_1part_constrained_weights, (m_total_mode11_1part_constrained_weights * 100.0f) / m_total_blocks);
			
 
				+	printf("Total mode 11, 2 partition: %u %3.2f%%\n", m_total_mode11_2part, (m_total_mode11_2part * 100.0f) / m_total_blocks);
			
 
				+
			
 
				+	printf("Total mode 7, 1 partition: %u %3.2f%%\n", m_total_mode7_1part, (m_total_mode7_1part * 100.0f) / m_total_blocks);
			
 
				+	printf("Total mode 7, 2 partition: %u %3.2f%%\n", m_total_mode7_2part, (m_total_mode7_2part * 100.0f) / m_total_blocks);
			
 
				+
			
 
				+	printf("Total 2 partitions: %u %3.2f%%\n", m_total_2part, (m_total_2part * 100.0f) / m_total_blocks);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("ISE texel weight range histogram mode 11:\n");
			
 
				+	for (uint32_t i = 1; i <= MODE11_LAST_ISE_RANGE; i++)
			
 
				+		printf("%u %u\n", i, m_weight_range_hist_11[i]);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("ISE texel weight range histogram mode 11, 2 partition:\n");
			
 
				+	for (uint32_t i = 1; i <= MODE11_PART2_LAST_ISE_RANGE; i++)
			
 
				+		printf("%u %u\n", i, m_weight_range_hist_11_2part[i]);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("ISE texel weight range histogram mode 7:\n");
			
 
				+	for (uint32_t i = 1; i <= MODE7_PART1_LAST_ISE_RANGE; i++)
			
 
				+		printf("%u %u\n", i, m_weight_range_hist_7[i]);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("ISE texel weight range histogram mode 7, 2 partition:\n");
			
 
				+	for (uint32_t i = 1; i <= MODE7_PART2_LAST_ISE_RANGE; i++)
			
 
				+		printf("%u %u\n", i, m_weight_range_hist_7_2part[i]);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("Mode 11 submode histogram:\n");
			
 
				+	for (uint32_t i = 0; i <= MODE11_TOTAL_SUBMODES; i++) // +1 because of the extra direct encoding
			
 
				+		printf("%u %u\n", i, m_mode11_submode_hist[i]);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("Mode 7 submode histogram:\n");
			
 
				+	for (uint32_t i = 0; i < MODE7_TOTAL_SUBMODES; i++)
			
 
				+		printf("%u %u\n", i, m_mode7_submode_hist[i]);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	printf("Partition pattern table usage histogram:\n");
			
 
				+	for (uint32_t i = 0; i < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2; i++)
			
 
				+		printf("%u:%u ", i, m_part_hist[i]);
			
 
				+	printf("\n\n");
			
 
				+}
			
 
				+
			
 
				+} // namespace basisu
			
 
				+
			
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h
@@ -0,0 +1,224 @@
 
				+// basisu_astc_hdr_enc.h
			
 
				+#pragma once
			
 
				+#include "basisu_enc.h"
			
 
				+#include "basisu_gpu_texture.h"
			
 
				+#include "../transcoder/basisu_astc_helpers.h"
			
 
				+#include "../transcoder/basisu_astc_hdr_core.h"
			
 
				+
			
 
				+namespace basisu
			
 
				+{
			
 
				+	// This MUST be called before encoding any blocks.
			
 
				+	void astc_hdr_enc_init();
			
 
				+
			
 
				+	const uint32_t MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
			
 
				+	const uint32_t MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
			
 
				+	const uint32_t MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS;
			
 
				+	const uint32_t MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS;
			
 
				+	const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
			
 
				+	const uint32_t MODE7_TOTAL_SUBMODES = 6;
			
 
				+		
			
 
				+	struct astc_hdr_codec_options
			
 
				+	{
			
 
				+		float m_bc6h_err_weight;
			
 
				+
			
 
				+		bool m_use_solid;
			
 
				+
			
 
				+		bool m_use_mode11;
			
 
				+		bool m_mode11_uber_mode;
			
 
				+		uint32_t m_first_mode11_weight_ise_range;
			
 
				+		uint32_t m_last_mode11_weight_ise_range;
			
 
				+		bool m_mode11_direct_only;
			
 
				+		int32_t m_first_mode11_submode;
			
 
				+		int32_t m_last_mode11_submode;
			
 
				+
			
 
				+		bool m_use_mode7_part1;
			
 
				+		uint32_t m_first_mode7_part1_weight_ise_range;
			
 
				+		uint32_t m_last_mode7_part1_weight_ise_range;
			
 
				+
			
 
				+		bool m_use_mode7_part2;
			
 
				+		uint32_t m_mode7_part2_part_masks;
			
 
				+		uint32_t m_first_mode7_part2_weight_ise_range;
			
 
				+		uint32_t m_last_mode7_part2_weight_ise_range;
			
 
				+
			
 
				+		bool m_use_mode11_part2;
			
 
				+		uint32_t m_mode11_part2_part_masks;
			
 
				+		uint32_t m_first_mode11_part2_weight_ise_range;
			
 
				+		uint32_t m_last_mode11_part2_weight_ise_range;
			
 
				+
			
 
				+		float m_r_err_scale, m_g_err_scale;
			
 
				+
			
 
				+		bool m_refine_weights;
			
 
				+
			
 
				+		uint32_t m_level;
			
 
				+
			
 
				+		bool m_use_estimated_partitions;
			
 
				+		uint32_t m_max_estimated_partitions;
			
 
				+
			
 
				+		// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
			
 
				+		bool m_allow_uber_mode;
			
 
				+
			
 
				+		astc_hdr_codec_options();
			
 
				+
			
 
				+		void init();
			
 
				+				
			
 
				+		// TODO: set_quality_level() is preferred to configure the codec for transcoding purposes.
			
 
				+		static const int cMinLevel = 0;
			
 
				+		static const int cMaxLevel = 4;
			
 
				+		static const int cDefaultLevel = 1;
			
 
				+		void set_quality_level(int level);
			
 
				+
			
 
				+	private:
			
 
				+		void set_quality_best();
			
 
				+		void set_quality_normal();
			
 
				+		void set_quality_fastest();
			
 
				+	};
			
 
				+
			
 
				+	struct astc_hdr_pack_results
			
 
				+	{
			
 
				+		double m_best_block_error;
			
 
				+		double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance 
			
 
				+
			
 
				+		// Encoder results (logical ASTC block)
			
 
				+		astc_helpers::log_astc_block m_best_blk;
			
 
				+		
			
 
				+		// For statistical use
			
 
				+		uint32_t m_best_submodes[2];
			
 
				+		uint32_t m_best_pat_index;
			
 
				+		bool m_constrained_weights;
			
 
				+
			
 
				+		bool m_improved_via_refinement_flag;
			
 
				+				
			
 
				+		// Only valid if the block is solid
			
 
				+		basist::astc_blk m_solid_blk;
			
 
				+		
			
 
				+		// The BC6H transcoded block
			
 
				+		basist::bc6h_block m_bc6h_block;
			
 
				+
			
 
				+		// Solid color/void extent flag
			
 
				+		bool m_is_solid;
			
 
				+
			
 
				+		void clear()
			
 
				+		{
			
 
				+			m_best_block_error = 1e+30f;
			
 
				+			m_bc6h_block_error = 1e+30f;
			
 
				+
			
 
				+			m_best_blk.clear();
			
 
				+			m_best_blk.m_grid_width = 4;
			
 
				+			m_best_blk.m_grid_height = 4;
			
 
				+			m_best_blk.m_endpoint_ise_range = 20; // 0-255
			
 
				+
			
 
				+			clear_obj(m_best_submodes);
			
 
				+
			
 
				+			m_best_pat_index = 0;
			
 
				+			m_constrained_weights = false;
			
 
				+									
			
 
				+			clear_obj(m_bc6h_block);
			
 
				+			
			
 
				+			m_is_solid = false;
			
 
				+			m_improved_via_refinement_flag = false;
			
 
				+		}
			
 
				+	};
			
 
				+			
			
 
				+	void interpolate_qlog12_colors(
			
 
				+		const int e[2][3],
			
 
				+		basist::half_float* pDecoded_half,
			
 
				+		vec3F* pDecoded_float,
			
 
				+		uint32_t n, uint32_t ise_weight_range);
			
 
				+		
			
 
				+	bool get_astc_hdr_mode_11_block_colors(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		basist::half_float* pDecoded_half,
			
 
				+		vec3F* pDecoded_float,
			
 
				+		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
			
 
				+		
			
 
				+	bool get_astc_hdr_mode_7_block_colors(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		basist::half_float* pDecoded_half,
			
 
				+		vec3F* pDecoded_float,
			
 
				+		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
			
 
				+
			
 
				+	double eval_selectors(
			
 
				+		uint32_t num_pixels,
			
 
				+		uint8_t* pWeights,
			
 
				+		const basist::half_float* pBlock_pixels_half,
			
 
				+		uint32_t num_weight_levels,
			
 
				+		const basist::half_float* pDecoded_half,
			
 
				+		const astc_hdr_codec_options& coptions,
			
 
				+		uint32_t usable_selector_bitmask = UINT32_MAX);
			
 
				+
			
 
				+	double compute_block_error(const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_options& coptions);
			
 
				+
			
 
				+	// Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels.
			
 
				+	// Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes), 
			
 
				+	// and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format.
			
 
				+	// The packed ASTC weight grid dimensions are currently always 4x4 texels, but may be also 3x3 in the future.
			
 
				+	// This function is thread safe, i.e. it may be called from multiple encoding threads simultanously with different blocks.
			
 
				+	// 
			
 
				+	// Parameters:
			
 
				+	// pRGBPixels - An array of 48 (16 RGB) floats: the 4x4 block to pack
			
 
				+	// pPacked_block - A pointer to the packed ASTC HDR block
			
 
				+	// coptions - Codec options
			
 
				+	// pInternal_results - An optional pointer to details about how the block was packed, for statistics/debugging purposes. May be nullptr.
			
 
				+	// 
			
 
				+	// Requirements: 
			
 
				+	// astc_hdr_enc_init() MUST have been called first to initialized the codec.
			
 
				+	// Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504). 
			
 
				+	// Normal values and denormals are okay.
			
 
				+	bool astc_hdr_enc_block(
			
 
				+		const float* pRGBPixels,
			
 
				+		const astc_hdr_codec_options& coptions,
			
 
				+		basisu::vector<astc_hdr_pack_results> &all_results);
			
 
				+
			
 
				+	bool astc_hdr_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_pack_results& results);
			
 
				+		
			
 
				+	bool astc_hdr_refine_weights(const basist::half_float* pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool* pImproved_flag);
			
 
				+
			
 
				+	struct astc_hdr_block_stats
			
 
				+	{
			
 
				+		std::mutex m_mutex;
			
 
				+
			
 
				+		uint32_t m_total_blocks;
			
 
				+		uint32_t m_total_2part, m_total_solid;
			
 
				+		uint32_t m_total_mode7_1part, m_total_mode7_2part;
			
 
				+		uint32_t m_total_mode11_1part, m_total_mode11_2part;
			
 
				+		uint32_t m_total_mode11_1part_constrained_weights;
			
 
				+
			
 
				+		uint32_t m_weight_range_hist_7[11];
			
 
				+		uint32_t m_weight_range_hist_7_2part[11];
			
 
				+		uint32_t m_mode7_submode_hist[6];
			
 
				+
			
 
				+		uint32_t m_weight_range_hist_11[11];
			
 
				+		uint32_t m_weight_range_hist_11_2part[11];
			
 
				+		uint32_t m_mode11_submode_hist[9];
			
 
				+								
			
 
				+		uint32_t m_part_hist[32];
			
 
				+
			
 
				+		uint32_t m_total_refined;
			
 
				+								
			
 
				+		astc_hdr_block_stats() { clear(); }
			
 
				+
			
 
				+		void clear()
			
 
				+		{
			
 
				+			std::lock_guard<std::mutex> lck(m_mutex);
			
 
				+
			
 
				+			m_total_blocks = 0;
			
 
				+			m_total_mode7_1part = 0, m_total_mode7_2part = 0, m_total_mode11_1part = 0, m_total_2part = 0, m_total_solid = 0, m_total_mode11_2part = 0;
			
 
				+			m_total_mode11_1part_constrained_weights = 0;
			
 
				+			m_total_refined = 0;
			
 
				+
			
 
				+			clear_obj(m_weight_range_hist_11);
			
 
				+			clear_obj(m_weight_range_hist_11_2part);
			
 
				+			clear_obj(m_weight_range_hist_7);
			
 
				+			clear_obj(m_weight_range_hist_7_2part);
			
 
				+			clear_obj(m_mode7_submode_hist);
			
 
				+			clear_obj(m_mode11_submode_hist);
			
 
				+			clear_obj(m_part_hist);
			
 
				+		}
			
 
				+
			
 
				+		void update(const astc_hdr_pack_results& log_blk);
			
 
				+		
			
 
				+		void print();
			
 
				+	};
			
 
				+		
			
 
				+} // namespace basisu
			
 
				+
			
--- a/thirdparty/basis_universal/encoder/basisu_backend.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_backend.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_backend.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_backend.h
+++ b/thirdparty/basis_universal/encoder/basisu_backend.h
@@ -1,5 +1,5 @@
 
				 // basisu_backend.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_basis_file.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_basis_file.h
+++ b/thirdparty/basis_universal/encoder/basisu_basis_file.h
@@ -1,5 +1,5 @@
 
				 // basisu_basis_file.h
			
 
				-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp
@@ -1,5 +1,5 @@
 
				 // File: basisu_bc7enc.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -394,6 +394,7 @@ void bc7enc_compress_block_init()
 
				 static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors)
			
 
				 {
			
 
				 	// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf 
			
 
				+	// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
			
 
				 	// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
			
 
				 	double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
			
 
				 	double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
			
@@ -1301,6 +1302,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color
 
				 		for (uint32_t c = 0; c < 4; c++)
			
 
				 			colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]);
			
 
				 
			
 
				+#ifdef _DEBUG
			
 
				 	uint64_t total_err = 0;
			
 
				 	for (uint32_t p = 0; p < pParams->m_num_pixels; p++)
			
 
				 	{
			
@@ -1313,6 +1315,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color
 
				 			total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights);
			
 
				 	}
			
 
				 	assert(total_err == pResults->m_best_overall_err);
			
 
				+#endif
			
 
				 	
			
 
				 	// HACK HACK
			
 
				 	//if (total_err != pResults->m_best_overall_err)
			
--- a/thirdparty/basis_universal/encoder/basisu_bc7enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.h
@@ -1,5 +1,5 @@
 
				 // File: basisu_bc7enc.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_comp.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp
--- a/thirdparty/basis_universal/encoder/basisu_comp.h
+++ b/thirdparty/basis_universal/encoder/basisu_comp.h
@@ -1,5 +1,5 @@
 
				 // basisu_comp.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -18,9 +18,10 @@
 
				 #include "basisu_basis_file.h"
			
 
				 #include "../transcoder/basisu_transcoder.h"
			
 
				 #include "basisu_uastc_enc.h"
			
 
				+#include "basisu_astc_hdr_enc.h"
			
 
				 
			
 
				-#define BASISU_LIB_VERSION 116
			
 
				-#define BASISU_LIB_VERSION_STRING "1.16"
			
 
				+#define BASISU_LIB_VERSION 150
			
 
				+#define BASISU_LIB_VERSION_STRING "1.50"
			
 
				 
			
 
				 #ifndef BASISD_SUPPORT_KTX2
			
 
				 	#error BASISD_SUPPORT_KTX2 is undefined
			
@@ -81,6 +82,8 @@ namespace basisu
 
				 			m_basis_luma_601_psnr = 0.0f;
			
 
				 			m_basis_luma_709_ssim = 0.0f;
			
 
				 
			
 
				+			m_basis_rgb_avg_bc6h_psnr = 0.0f;
			
 
				+
			
 
				 			m_bc7_rgb_avg_psnr = 0.0f;
			
 
				 			m_bc7_rgba_avg_psnr = 0.0f;
			
 
				 			m_bc7_a_avg_psnr = 0.0f;
			
@@ -100,7 +103,7 @@ namespace basisu
 
				 		uint32_t m_width;
			
 
				 		uint32_t m_height;
			
 
				 
			
 
				-		// .basis compressed (ETC1S or UASTC statistics)
			
 
				+		// .basis/.ktx2 compressed (LDR: ETC1S or UASTC statistics, HDR: transcoded BC6H statistics)
			
 
				 		float m_basis_rgb_avg_psnr;
			
 
				 		float m_basis_rgba_avg_psnr;
			
 
				 		float m_basis_a_avg_psnr;
			
@@ -108,7 +111,10 @@ namespace basisu
 
				 		float m_basis_luma_601_psnr;
			
 
				 		float m_basis_luma_709_ssim;
			
 
				 
			
 
				-		// BC7 statistics
			
 
				+		// UASTC HDR only.
			
 
				+		float m_basis_rgb_avg_bc6h_psnr;
			
 
				+
			
 
				+		// LDR: BC7 statistics
			
 
				 		float m_bc7_rgb_avg_psnr;
			
 
				 		float m_bc7_rgba_avg_psnr;
			
 
				 		float m_bc7_a_avg_psnr;
			
@@ -116,7 +122,7 @@ namespace basisu
 
				 		float m_bc7_luma_601_psnr;
			
 
				 		float m_bc7_luma_709_ssim;
			
 
				 		
			
 
				-		// Highest achievable quality ETC1S statistics
			
 
				+		// LDR: Highest achievable quality ETC1S statistics
			
 
				 		float m_best_etc1s_rgb_avg_psnr;
			
 
				 		float m_best_etc1s_luma_709_psnr;
			
 
				 		float m_best_etc1s_luma_601_psnr;
			
@@ -256,7 +262,7 @@ namespace basisu
 
				 			m_no_selector_rdo.clear();
			
 
				 			m_selector_rdo_thresh.clear();
			
 
				 			m_read_source_images.clear();
			
 
				-			m_write_output_basis_files.clear();
			
 
				+			m_write_output_basis_or_ktx2_files.clear();
			
 
				 			m_compression_level.clear();
			
 
				 			m_compute_stats.clear();
			
 
				 			m_print_stats.clear();
			
@@ -317,27 +323,38 @@ namespace basisu
 
				 
			
 
				 			m_validate_output_data.clear();
			
 
				 
			
 
				+			m_hdr_ldr_srgb_to_linear_conversion.clear();
			
 
				+
			
 
				+			m_hdr_favor_astc.clear();
			
 
				+			
			
 
				 			m_pJob_pool = nullptr;
			
 
				 		}
			
 
				 						
			
 
				-		// True to generate UASTC .basis file data, otherwise ETC1S.
			
 
				+		// True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S.
			
 
				 		bool_param<false> m_uastc;
			
 
				 
			
 
				+		// Set m_hdr to true to switch to UASTC HDR mode.
			
 
				+		bool_param<false> m_hdr;
			
 
				+
			
 
				 		bool_param<false> m_use_opencl;
			
 
				 
			
 
				-		// If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. 
			
 
				-		// Otherwise, the compressor processes the images in m_source_images.
			
 
				+		// If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. 
			
 
				+		// Otherwise, the compressor processes the images in m_source_images or m_source_images_hdr.
			
 
				 		basisu::vector<std::string> m_source_filenames;
			
 
				 		basisu::vector<std::string> m_source_alpha_filenames;
			
 
				 		
			
 
				 		basisu::vector<image> m_source_images;
			
 
				 		
			
 
				+		basisu::vector<imagef> m_source_images_hdr;
			
 
				+				
			
 
				 		// Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual.
			
 
				 		// If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error.
			
 
				 		// The compressor applies the user-provided swizzling (in m_swizzle) to these images.
			
 
				 		basisu::vector< basisu::vector<image> > m_source_mipmap_images;
			
 
				+
			
 
				+		basisu::vector< basisu::vector<imagef> > m_source_mipmap_images_hdr;
			
 
				 						
			
 
				-		// Filename of the output basis file
			
 
				+		// Filename of the output basis/ktx2 file
			
 
				 		std::string m_out_filename;
			
 
				 
			
 
				 		// The params are done this way so we can detect when the user has explictly changed them.
			
@@ -373,8 +390,8 @@ namespace basisu
 
				 		// Read source images from m_source_filenames/m_source_alpha_filenames
			
 
				 		bool_param<false> m_read_source_images;
			
 
				 
			
 
				-		// Write the output basis file to disk using m_out_filename
			
 
				-		bool_param<false> m_write_output_basis_files;
			
 
				+		// Write the output basis/ktx2 file to disk using m_out_filename
			
 
				+		bool_param<false> m_write_output_basis_or_ktx2_files;
			
 
				 								
			
 
				 		// Compute and display image metrics 
			
 
				 		bool_param<false> m_compute_stats;
			
@@ -382,15 +399,15 @@ namespace basisu
 
				 		// Print stats to stdout, if m_compute_stats is true.
			
 
				 		bool_param<true> m_print_stats;
			
 
				 		
			
 
				-		// Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels
			
 
				+		// Check to see if any input image has an alpha channel, if so then the output basis/ktx2 file will have alpha channels
			
 
				 		bool_param<true> m_check_for_alpha;
			
 
				 		
			
 
				-		// Always put alpha slices in the output basis file, even when the input doesn't have alpha
			
 
				+		// Always put alpha slices in the output basis/ktx2 file, even when the input doesn't have alpha
			
 
				 		bool_param<false> m_force_alpha; 
			
 
				 		bool_param<true> m_multithreading;
			
 
				 		
			
 
				-		// Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels
			
 
				-		char m_swizzle[4];
			
 
				+		// Split the R channel to RGB and the G channel to alpha, then write a basis/ktx2 file with alpha channels
			
 
				+		uint8_t m_swizzle[4];
			
 
				 
			
 
				 		bool_param<false> m_renormalize;
			
 
				 
			
@@ -448,8 +465,17 @@ namespace basisu
 
				 		param<int> m_ktx2_zstd_supercompression_level;
			
 
				 		bool_param<false> m_ktx2_srgb_transfer_func;
			
 
				 
			
 
				+		astc_hdr_codec_options m_uastc_hdr_options;
			
 
				+
			
 
				 		bool_param<false> m_validate_output_data;
			
 
				 
			
 
				+		// If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion) and then processed as HDR. 
			
 
				+		// Otherwise, LDR images will be processed as HDR as-is.
			
 
				+		bool_param<true> m_hdr_ldr_srgb_to_linear_conversion;
			
 
				+
			
 
				+		// If true, ASTC HDR quality is favored more than BC6H quality. Otherwise it's a rough balance.
			
 
				+		bool_param<false> m_hdr_favor_astc;
			
 
				+						
			
 
				 		job_pool *m_pJob_pool;
			
 
				 	};
			
 
				 
			
@@ -504,6 +530,7 @@ namespace basisu
 
				 		opencl_context_ptr m_pOpenCL_context;
			
 
				 		
			
 
				 		basisu::vector<image> m_slice_images;
			
 
				+		basisu::vector<imagef> m_slice_images_hdr;
			
 
				 
			
 
				 		basisu::vector<image_stats> m_stats;
			
 
				 
			
@@ -515,7 +542,9 @@ namespace basisu
 
				 		uint32_t m_total_blocks;
			
 
				 		
			
 
				 		basisu_frontend m_frontend;
			
 
				+
			
 
				 		pixel_block_vec m_source_blocks;
			
 
				+		pixel_block_hdr_vec m_source_blocks_hdr;
			
 
				 
			
 
				 		basisu::vector<gpu_image> m_frontend_output_textures;
			
 
				 
			
@@ -526,11 +555,17 @@ namespace basisu
 
				 
			
 
				 		basisu_file m_basis_file;
			
 
				 
			
 
				-		basisu::vector<gpu_image> m_decoded_output_textures;
			
 
				+		basisu::vector<gpu_image> m_decoded_output_textures;			// BC6H in HDR mode
			
 
				 		basisu::vector<image> m_decoded_output_textures_unpacked;
			
 
				+		
			
 
				 		basisu::vector<gpu_image> m_decoded_output_textures_bc7;
			
 
				 		basisu::vector<image> m_decoded_output_textures_unpacked_bc7;
			
 
				 
			
 
				+		basisu::vector<imagef> m_decoded_output_textures_bc6h_hdr_unpacked;	// BC6H in HDR mode
			
 
				+
			
 
				+		basisu::vector<gpu_image> m_decoded_output_textures_astc_hdr;
			
 
				+		basisu::vector<imagef> m_decoded_output_textures_astc_hdr_unpacked;
			
 
				+
			
 
				 		uint8_vec m_output_basis_file;
			
 
				 		uint8_vec m_output_ktx2_file;
			
 
				 		
			
@@ -541,14 +576,21 @@ namespace basisu
 
				 
			
 
				 		bool m_opencl_failed;
			
 
				 
			
 
				+		void check_for_hdr_inputs();
			
 
				+		bool sanity_check_input_params();
			
 
				+		void clean_hdr_image(imagef& src_img);
			
 
				+		bool read_dds_source_images();
			
 
				 		bool read_source_images();
			
 
				 		bool extract_source_blocks();
			
 
				 		bool process_frontend();
			
 
				 		bool extract_frontend_texture_data();
			
 
				 		bool process_backend();
			
 
				 		bool create_basis_file_and_transcode();
			
 
				+		bool write_hdr_debug_images(const char* pBasename, const imagef& img, uint32_t width, uint32_t height);
			
 
				 		bool write_output_files_and_compute_stats();
			
 
				+		error_code encode_slices_to_uastc_hdr();
			
 
				 		error_code encode_slices_to_uastc();
			
 
				+		bool generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha);
			
 
				 		bool generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha);
			
 
				 		bool validate_texture_type_constraints();
			
 
				 		bool validate_ktx2_constraints();
			
@@ -568,7 +610,8 @@ namespace basisu
 
				 	//   
			
 
				 	// flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC level, i.e. "cFlagSRGB | cFlagGenMipsClamp | cFlagThreaded | 128" or "cFlagSRGB | cFlagGenMipsClamp | cFlagUASTC | cFlagThreaded | cPackUASTCLevelDefault".
			
 
				 	//	  In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files)
			
 
				-	//	  In UASTC mode, the lower 8-bits are the UASTC pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. 
			
 
				+	//	  In UASTC mode, the lower 8-bits are the UASTC LDR/HDR pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR.
			
 
				+	//	  In UASTC mode, be sure to set this, otherwise it defaults to 0 (fastest/lowest quality).
			
 
				 	// 
			
 
				 	// uastc_rdo_quality: Float UASTC RDO quality level (0=no change, higher values lower quality but increase compressibility, initially try .5-1.5)
			
 
				 	// 
			
@@ -594,20 +637,36 @@ namespace basisu
 
				 		cFlagUASTCRDO = 1 << 18,		// use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar)
			
 
				 		
			
 
				 		cFlagPrintStats = 1 << 19,	// print image stats to stdout
			
 
				-		cFlagPrintStatus = 1 << 20	// print status to stdout
			
 
				+		cFlagPrintStatus = 1 << 20,	// print status to stdout
			
 
				+		
			
 
				+		cFlagHDR = 1 << 21,			// Force encoder into HDR mode, even if source image is LDR.
			
 
				+		cFlagHDRLDRImageSRGBToLinearConversion = 1 << 22, // In HDR mode, convert LDR source images to linear before encoding.
			
 
				+		
			
 
				+		cFlagDebugImages = 1 << 23	// enable status output
			
 
				 	};
			
 
				 
			
 
				 	// This function accepts an array of source images. 
			
 
				 	// If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled.
			
 
				-	// Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. The returned block must be freed using basis_free_data().
			
 
				+	// Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. 
			
 
				+	// Important: The returned block MUST be manually freed using basis_free_data().
			
 
				 	// basisu_encoder_init() MUST be called first!
			
 
				+	// LDR version. To compress the LDR source image as HDR: Use the cFlagHDR flag.
			
 
				 	void* basis_compress(
			
 
				 		const basisu::vector<image> &source_images,
			
 
				 		uint32_t flags_and_quality, float uastc_rdo_quality,
			
 
				 		size_t* pSize,
			
 
				 		image_stats* pStats = nullptr);
			
 
				 
			
 
				-	// This function only accepts a single source image.
			
 
				+	// HDR-only version.
			
 
				+	// Important: The returned block MUST be manually freed using basis_free_data().
			
 
				+	void* basis_compress(
			
 
				+		const basisu::vector<imagef>& source_images_hdr,
			
 
				+		uint32_t flags_and_quality, 
			
 
				+		size_t* pSize,
			
 
				+		image_stats* pStats = nullptr);
			
 
				+
			
 
				+	// This function only accepts a single LDR source image. It's just a wrapper for basis_compress() above.
			
 
				+	// Important: The returned block MUST be manually freed using basis_free_data().
			
 
				 	void* basis_compress(
			
 
				 		const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels,
			
 
				 		uint32_t flags_and_quality, float uastc_rdo_quality,
			
@@ -615,6 +674,7 @@ namespace basisu
 
				 		image_stats* pStats = nullptr);
			
 
				 
			
 
				 	// Frees the dynamically allocated file data returned by basis_compress().
			
 
				+	// This MUST be called on the pointer returned by basis_compress() when you're done with it.
			
 
				 	void basis_free_data(void* p);
			
 
				 
			
 
				 	// Runs a short benchmark using synthetic image data to time OpenCL encoding vs. CPU encoding, with multithreading enabled.
			
--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
--- a/thirdparty/basis_universal/encoder/basisu_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_enc.h
@@ -1,5 +1,5 @@
 
				 // basisu_enc.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -48,7 +48,8 @@ namespace basisu
 
				 
			
 
				 	// Encoder library initialization.
			
 
				 	// This function MUST be called before encoding anything!
			
 
				-	void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
			
 
				+	// Returns false if library initialization fails.
			
 
				+	bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
			
 
				 	void basisu_encoder_deinit();
			
 
				 
			
 
				 	// basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
			
@@ -70,6 +71,18 @@ namespace basisu
 
				 		return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
			
 
				 	}
			
 
				 
			
 
				+	inline int left_shift32(int val, int shift)
			
 
				+	{
			
 
				+		assert((shift >= 0) && (shift < 32));
			
 
				+		return static_cast<int>(static_cast<uint32_t>(val) << shift);
			
 
				+	}
			
 
				+
			
 
				+	inline uint32_t left_shift32(uint32_t val, int shift)
			
 
				+	{
			
 
				+		assert((shift >= 0) && (shift < 32));
			
 
				+		return val << shift;
			
 
				+	}
			
 
				+
			
 
				 	inline int32_t clampi(int32_t value, int32_t low, int32_t high) 
			
 
				 	{ 
			
 
				 		if (value < low) 
			
@@ -130,6 +143,31 @@ namespace basisu
 
				 
			
 
				 		return bits;
			
 
				 	}
			
 
				+		
			
 
				+	// Open interval
			
 
				+	inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
			
 
				+	inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
			
 
				+
			
 
				+	// Closed interval
			
 
				+	inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
			
 
				+	inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
			
 
				+
			
 
				+	inline uint32_t clz(uint32_t x)
			
 
				+	{
			
 
				+		if (!x)
			
 
				+			return 32;
			
 
				+
			
 
				+		uint32_t n = 0;
			
 
				+		while ((x & 0x80000000) == 0)
			
 
				+		{
			
 
				+			x <<= 1u;
			
 
				+			n++;
			
 
				+		}
			
 
				+
			
 
				+		return n;
			
 
				+	}
			
 
				+
			
 
				+	bool string_begins_with(const std::string& str, const char* pPhrase);
			
 
				 				
			
 
				 	// Hashing
			
 
				 	
			
@@ -268,6 +306,7 @@ namespace basisu
 
				 
			
 
				 	public:
			
 
				 		enum { num_elements = N };
			
 
				+		typedef T scalar_type;
			
 
				 
			
 
				 		inline vec() { }
			
 
				 		inline vec(eZero) { set_zero();  }
			
@@ -291,6 +330,7 @@ namespace basisu
 
				 		inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
			
 
				 
			
 
				 		inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
			
 
				+		inline void clear() { set_zero(); }
			
 
				 
			
 
				 		template <uint32_t OtherN, typename OtherT>
			
 
				 		inline vec &set(const vec<OtherN, OtherT> &other)
			
@@ -391,7 +431,7 @@ namespace basisu
 
				 		inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
			
 
				 		inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
			
 
				 
			
 
				-		inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len);	return *this; }
			
 
				+		inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
			
 
				 
			
 
				 		inline vec &clamp(T l, T h)
			
 
				 		{
			
@@ -722,7 +762,7 @@ namespace basisu
 
				 		void job_thread(uint32_t index);
			
 
				 	};
			
 
				 
			
 
				-	// Simple 32-bit color class
			
 
				+	// Simple 64-bit color class
			
 
				 
			
 
				 	class color_rgba_i16
			
 
				 	{
			
@@ -1116,7 +1156,9 @@ namespace basisu
 
				 	{
			
 
				 		std::string result(s);
			
 
				 		for (size_t i = 0; i < result.size(); i++)
			
 
				-			result[i] = (char)tolower((int)result[i]);
			
 
				+		{
			
 
				+			result[i] = (char)tolower((uint8_t)(result[i]));
			
 
				+		}
			
 
				 		return result;
			
 
				 	}
			
 
				 
			
@@ -1408,7 +1450,7 @@ namespace basisu
 
				 
			
 
				 		size_t get_total_training_vecs() const { return m_training_vecs.size(); }
			
 
				 		const array_of_weighted_training_vecs &get_training_vecs() const	{ return m_training_vecs; }
			
 
				-				array_of_weighted_training_vecs &get_training_vecs()			{ return m_training_vecs; }
			
 
				+			  array_of_weighted_training_vecs &get_training_vecs()			{ return m_training_vecs; }
			
 
				 
			
 
				 		void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
			
 
				 		{
			
@@ -1437,36 +1479,36 @@ namespace basisu
 
				 		}
			
 
				 
			
 
				 		void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
			
 
				-      {
			
 
				+		{
			
 
				 			uint_vec node_stack;
			
 
				-         node_stack.reserve(512);
			
 
				+			node_stack.reserve(512);
			
 
				 
			
 
				-         codebook.resize(0);
			
 
				-         codebook.reserve(max_clusters);
			
 
				+			codebook.resize(0);
			
 
				+			codebook.reserve(max_clusters);
			
 
				 			         
			
 
				-         uint32_t node_index = 0;
			
 
				+			uint32_t node_index = 0;
			
 
				 
			
 
				-         while (true)
			
 
				-         {
			
 
				-            const tsvq_node& cur = m_nodes[node_index];
			
 
				+			while (true)
			
 
				+			{
			
 
				+				const tsvq_node& cur = m_nodes[node_index];
			
 
				 
			
 
				-            if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
			
 
				-            {
			
 
				-               codebook.resize(codebook.size() + 1);
			
 
				-               codebook.back() = cur.m_training_vecs;
			
 
				+				if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
			
 
				+				{
			
 
				+					codebook.resize(codebook.size() + 1);
			
 
				+					codebook.back() = cur.m_training_vecs;
			
 
				 										
			
 
				-               if (node_stack.empty())
			
 
				-                  break;
			
 
				+					if (node_stack.empty())
			
 
				+						break;
			
 
				 
			
 
				-               node_index = node_stack.back();
			
 
				-               node_stack.pop_back();
			
 
				-               continue;
			
 
				-            }
			
 
				+					node_index = node_stack.back();
			
 
				+					node_stack.pop_back();
			
 
				+					continue;
			
 
				+				}
			
 
				 				            
			
 
				-            node_stack.push_back(cur.m_right_index);
			
 
				-				node_index = cur.m_left_index;
			
 
				-         }
			
 
				-      }
			
 
				+				node_stack.push_back(cur.m_right_index);
			
 
				+					node_index = cur.m_left_index;
			
 
				+			}
			
 
				+		}
			
 
				 
			
 
				 		bool generate(uint32_t max_size)
			
 
				 		{
			
@@ -2319,6 +2361,14 @@ namespace basisu
 
				 			m_total_bits = 0;
			
 
				 		}
			
 
				 
			
 
				+		inline void restart()
			
 
				+		{
			
 
				+			m_bytes.resize(0);
			
 
				+			m_bit_buffer = 0;
			
 
				+			m_bit_buffer_size = 0;
			
 
				+			m_total_bits = 0;
			
 
				+		}
			
 
				+
			
 
				 		inline const uint8_vec &get_bytes() const { return m_bytes; }
			
 
				 
			
 
				 		inline uint64_t get_total_bits() const { return m_total_bits; }
			
@@ -2920,11 +2970,11 @@ namespace basisu
 
				 		inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
			
 
				 		inline color_rgba *get_ptr() { return &m_pixels[0]; }
			
 
				 
			
 
				-		bool has_alpha() const
			
 
				+		bool has_alpha(uint32_t channel = 3) const
			
 
				 		{
			
 
				 			for (uint32_t y = 0; y < m_height; ++y)
			
 
				 				for (uint32_t x = 0; x < m_width; ++x)
			
 
				-					if ((*this)(x, y).a < 255)
			
 
				+					if ((*this)(x, y)[channel] < 255)
			
 
				 						return true;
			
 
				 
			
 
				 			return false;
			
@@ -3130,6 +3180,31 @@ namespace basisu
 
				 			return *this;
			
 
				 		}
			
 
				 
			
 
				+		imagef& crop_dup_borders(uint32_t w, uint32_t h)
			
 
				+		{
			
 
				+			const uint32_t orig_w = m_width, orig_h = m_height;
			
 
				+
			
 
				+			crop(w, h);
			
 
				+
			
 
				+			if (orig_w && orig_h)
			
 
				+			{
			
 
				+				if (m_width > orig_w)
			
 
				+				{
			
 
				+					for (uint32_t x = orig_w; x < m_width; x++)
			
 
				+						for (uint32_t y = 0; y < m_height; y++)
			
 
				+							set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
			
 
				+				}
			
 
				+
			
 
				+				if (m_height > orig_h)
			
 
				+				{
			
 
				+					for (uint32_t y = orig_h; y < m_height; y++)
			
 
				+						for (uint32_t x = 0; x < m_width; x++)
			
 
				+							set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
			
 
				+				}
			
 
				+			}
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				 		inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
			
 
				 		inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
			
 
				 
			
@@ -3213,19 +3288,128 @@ namespace basisu
 
				 
			
 
				 		inline const vec4F *get_ptr() const { return &m_pixels[0]; }
			
 
				 		inline vec4F *get_ptr() { return &m_pixels[0]; }
			
 
				+
			
 
				+		bool clean_astc_hdr_pixels(float highest_mag)
			
 
				+		{
			
 
				+			bool status = true;
			
 
				+			bool nan_msg = false;
			
 
				+			bool inf_msg = false;
			
 
				+			bool neg_zero_msg = false;
			
 
				+			bool neg_msg = false;
			
 
				+			bool clamp_msg = false;
			
 
				+
			
 
				+			for (uint32_t iy = 0; iy < m_height; iy++)
			
 
				+			{
			
 
				+				for (uint32_t ix = 0; ix < m_width; ix++)
			
 
				+				{
			
 
				+					vec4F& c = (*this)(ix, iy);
			
 
				+
			
 
				+					for (uint32_t s = 0; s < 4; s++)
			
 
				+					{
			
 
				+						float &p = c[s];
			
 
				+						union { float f; uint32_t u; } x; x.f = p;
			
 
				+						
			
 
				+						if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000))
			
 
				+						{
			
 
				+							if (std::isnan(p))
			
 
				+							{
			
 
				+								if (!nan_msg)
			
 
				+								{
			
 
				+									fprintf(stderr, "One or more pixels was NaN, setting to 0.\n");
			
 
				+									nan_msg = true;
			
 
				+								}
			
 
				+							}
			
 
				+
			
 
				+							if (std::isinf(p))
			
 
				+							{
			
 
				+								if (!inf_msg)
			
 
				+								{
			
 
				+									fprintf(stderr, "One or more pixels was INF, setting to 0.\n");
			
 
				+									inf_msg = true;
			
 
				+								}
			
 
				+							}
			
 
				+
			
 
				+							if (x.u == 0x80000000)
			
 
				+							{
			
 
				+								if (!neg_zero_msg)
			
 
				+								{
			
 
				+									fprintf(stderr, "One or more pixels was -0, setting them to 0.\n");
			
 
				+									neg_zero_msg = true;
			
 
				+								}
			
 
				+							}
			
 
				+
			
 
				+							p = 0.0f;
			
 
				+							status = false;
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							//const float o = p;
			
 
				+							if (p < 0.0f)
			
 
				+							{
			
 
				+								p = 0.0f;
			
 
				+
			
 
				+								if (!neg_msg)
			
 
				+								{
			
 
				+									fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
			
 
				+									neg_msg = true;
			
 
				+								}
			
 
				+								
			
 
				+								status = false;
			
 
				+							}
			
 
				+
			
 
				+							if (p > highest_mag)
			
 
				+							{
			
 
				+								p = highest_mag;
			
 
				+								
			
 
				+								if (!clamp_msg)
			
 
				+								{
			
 
				+									fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag);
			
 
				+									clamp_msg = true;
			
 
				+								}
			
 
				+
			
 
				+								status = false;
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			return status;
			
 
				+		}
			
 
				+
			
 
				+		imagef& flip_y()
			
 
				+		{
			
 
				+			for (uint32_t y = 0; y < m_height / 2; ++y)
			
 
				+				for (uint32_t x = 0; x < m_width; ++x)
			
 
				+					std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
			
 
				+
			
 
				+			return *this;
			
 
				+		}
			
 
				 						
			
 
				 	private:
			
 
				 		uint32_t m_width, m_height, m_pitch;  // all in pixels
			
 
				 		vec4F_vec m_pixels;
			
 
				 	};
			
 
				 
			
 
				+	// REC 709 coefficients
			
 
				+	const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f;
			
 
				+
			
 
				+	inline float get_luminance(const vec4F &c)
			
 
				+	{
			
 
				+		return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B;
			
 
				+	}
			
 
				+
			
 
				+	float linear_to_srgb(float l);
			
 
				+	float srgb_to_linear(float s);
			
 
				+
			
 
				 	// Image metrics
			
 
				 		
			
 
				 	class image_metrics
			
 
				 	{
			
 
				 	public:
			
 
				 		// TODO: Add ssim
			
 
				-		float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
			
 
				+		double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
			
 
				+		bool m_has_neg, m_hf_mag_overflow, m_any_abnormal;
			
 
				 
			
 
				 		image_metrics()
			
 
				 		{
			
@@ -3240,10 +3424,17 @@ namespace basisu
 
				 			m_rms = 0;
			
 
				 			m_psnr = 0;
			
 
				 			m_ssim = 0;
			
 
				+			m_has_neg = false;
			
 
				+			m_hf_mag_overflow = false;
			
 
				+			m_any_abnormal = false;
			
 
				 		}
			
 
				 
			
 
				-		void print(const char *pPrefix = nullptr)	{ printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr);	}
			
 
				+		void print(const char *pPrefix = nullptr)	{ printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr);	}
			
 
				+		void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); }
			
 
				 
			
 
				+		void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false);
			
 
				+		void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
			
 
				+		void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
			
 
				 		void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
			
 
				 	};
			
 
				 
			
@@ -3256,6 +3447,8 @@ namespace basisu
 
				 	bool load_tga(const char* pFilename, image& img);
			
 
				 	inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
			
 
				 
			
 
				+	bool load_qoi(const char* pFilename, image& img);
			
 
				+
			
 
				 	bool load_jpg(const char *pFilename, image& img);
			
 
				 	inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
			
 
				 	
			
@@ -3263,9 +3456,64 @@ namespace basisu
 
				 	bool load_image(const char* pFilename, image& img);
			
 
				 	inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
			
 
				 
			
 
				+	// Supports .HDR and most (but not all) .EXR's (see TinyEXR).
			
 
				+	bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true);
			
 
				+	inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); }
			
 
				+
			
 
				+	enum class hdr_image_type
			
 
				+	{
			
 
				+		cHITRGBAHalfFloat = 0,
			
 
				+		cHITRGBAFloat = 1,
			
 
				+		cHITPNGImage = 2,
			
 
				+		cHITEXRImage = 3,
			
 
				+		cHITHDRImage = 4
			
 
				+	};
			
 
				+
			
 
				+	bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear);
			
 
				+
			
 
				 	uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
			
 
				 	uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
			
 
				 		
			
 
				+	struct rgbe_header_info
			
 
				+	{
			
 
				+		std::string m_program;
			
 
				+
			
 
				+		// Note no validation is done, either gamma or exposure may be 0.
			
 
				+		double m_gamma;
			
 
				+		bool m_has_gamma;
			
 
				+
			
 
				+		double m_exposure; // watts/steradian/m^2.
			
 
				+		bool m_has_exposure;
			
 
				+
			
 
				+		void clear() 
			
 
				+		{ 
			
 
				+			m_program.clear(); 
			
 
				+			m_gamma = 1.0f; 
			
 
				+			m_has_gamma = false; 
			
 
				+			m_exposure = 1.0f; 
			
 
				+			m_has_exposure = false; 
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info);
			
 
				+	bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info);
			
 
				+
			
 
				+	bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info);
			
 
				+	bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info);
			
 
				+
			
 
				+	bool read_exr(const char* pFilename, imagef& img, int& n_chans);
			
 
				+	bool read_exr(const void* pMem, size_t mem_size, imagef& img);
			
 
				+	
			
 
				+	enum
			
 
				+	{
			
 
				+		WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic
			
 
				+		WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats
			
 
				+		WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block)
			
 
				+	};
			
 
				+
			
 
				+	// Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.
			
 
				+	bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags);
			
 
				+			
			
 
				 	enum
			
 
				 	{
			
 
				 		cImageSaveGrayscale = 1,
			
@@ -3276,19 +3524,22 @@ namespace basisu
 
				 	inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
			
 
				 	
			
 
				 	bool read_file_to_vec(const char* pFilename, uint8_vec& data);
			
 
				-	
			
 
				+	bool read_file_to_data(const char* pFilename, void *pData, size_t len);	
			
 
				+
			
 
				 	bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
			
 
				 	
			
 
				 	inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) {	return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
			
 
				-
			
 
				-	float linear_to_srgb(float l);
			
 
				-	float srgb_to_linear(float s);
			
 
				-
			
 
				+		
			
 
				 	bool image_resample(const image &src, image &dst, bool srgb = false,
			
 
				 		const char *pFilter = "lanczos4", float filter_scale = 1.0f, 
			
 
				 		bool wrapping = false,
			
 
				 		uint32_t first_comp = 0, uint32_t num_comps = 4);
			
 
				 
			
 
				+	bool image_resample(const imagef& src, imagef& dst, 
			
 
				+		const char* pFilter = "lanczos4", float filter_scale = 1.0f,
			
 
				+		bool wrapping = false,
			
 
				+		uint32_t first_comp = 0, uint32_t num_comps = 4);
			
 
				+		
			
 
				 	// Timing
			
 
				 			
			
 
				 	typedef uint64_t timer_ticks;
			
@@ -3319,6 +3570,8 @@ namespace basisu
 
				 		bool m_started, m_stopped;
			
 
				 	};
			
 
				 
			
 
				+	inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }
			
 
				+
			
 
				 	// 2D array
			
 
				 
			
 
				 	template<typename T>
			
@@ -3372,8 +3625,8 @@ namespace basisu
 
				 		inline const T &operator[] (uint32_t i) const { return m_values[i]; }
			
 
				 		inline T &operator[] (uint32_t i) { return m_values[i]; }
			
 
				 				
			
 
				-		inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }		
			
 
				-		inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
			
 
				+		inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }		
			
 
				+		inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
			
 
				 
			
 
				 		void clear()
			
 
				 		{
			
@@ -3450,7 +3703,327 @@ namespace basisu
 
				 		}
			
 
				 	};
			
 
				 	typedef basisu::vector<pixel_block> pixel_block_vec;
			
 
				-		
			
 
				+
			
 
				+	struct pixel_block_hdr
			
 
				+	{
			
 
				+		vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
			
 
				+
			
 
				+		inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
			
 
				+		inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
			
 
				+
			
 
				+		inline const vec4F* get_ptr() const { return &m_pixels[0][0]; }
			
 
				+		inline vec4F* get_ptr() { return &m_pixels[0][0]; }
			
 
				+
			
 
				+		inline void clear() { clear_obj(*this); }
			
 
				+
			
 
				+		inline bool operator== (const pixel_block& rhs) const
			
 
				+		{
			
 
				+			return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
			
 
				+		}
			
 
				+	};
			
 
				+	typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;
			
 
				+
			
 
				+	void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure);
			
 
				+	bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);
			
 
				+	
			
 
				+	// Intersection
			
 
				+	enum eClear { cClear = 0 };
			
 
				+	enum eInitExpand { cInitExpand = 0 };
			
 
				+
			
 
				+	template<typename vector_type>
			
 
				+	class ray
			
 
				+	{
			
 
				+	public:
			
 
				+		typedef vector_type vector_t;
			
 
				+		typedef typename vector_type::scalar_type scalar_type;
			
 
				+
			
 
				+		inline ray() { }
			
 
				+		inline ray(eClear) { clear(); }
			
 
				+		inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { }
			
 
				+
			
 
				+		inline void clear()
			
 
				+		{
			
 
				+			m_origin.clear();
			
 
				+			m_direction.clear();
			
 
				+		}
			
 
				+
			
 
				+		inline const vector_type& get_origin(void) const { return m_origin; }
			
 
				+		inline void set_origin(const vector_type& origin) { m_origin = origin; }
			
 
				+
			
 
				+		inline const vector_type& get_direction(void) const { return m_direction; }
			
 
				+		inline void set_direction(const vector_type& direction) { m_direction = direction; }
			
 
				+
			
 
				+		inline void set_endpoints(const vector_type& start, const vector_type& end)
			
 
				+		{
			
 
				+			m_origin = start;
			
 
				+
			
 
				+			m_direction = end - start;
			
 
				+			m_direction.normalize_in_place();
			
 
				+		}
			
 
				+
			
 
				+		inline vector_type eval(scalar_type t) const
			
 
				+		{
			
 
				+			return m_origin + m_direction * t;
			
 
				+		}
			
 
				+
			
 
				+	private:
			
 
				+		vector_type m_origin;
			
 
				+		vector_type m_direction;
			
 
				+	};
			
 
				+
			
 
				+	typedef ray<vec2F> ray2F;
			
 
				+	typedef ray<vec3F> ray3F;
			
 
				+
			
 
				+	template<typename T>
			
 
				+	class vec_interval
			
 
				+	{
			
 
				+	public:
			
 
				+		enum { N = T::num_elements };
			
 
				+		typedef typename T::scalar_type scalar_type;
			
 
				+
			
 
				+		inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; }
			
 
				+		inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; }
			
 
				+
			
 
				+		inline vec_interval() { }
			
 
				+		inline vec_interval(eClear) { clear(); }
			
 
				+		inline vec_interval(eInitExpand) { init_expand(); }
			
 
				+
			
 
				+		inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); }
			
 
				+
			
 
				+		inline void init_expand()
			
 
				+		{
			
 
				+			m_bounds[0].set(1e+30f, 1e+30f, 1e+30f);
			
 
				+			m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f);
			
 
				+		}
			
 
				+
			
 
				+		inline vec_interval expand(const T& p)
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < N; c++)
			
 
				+			{
			
 
				+				if (p[c] < m_bounds[0][c])
			
 
				+					m_bounds[0][c] = p[c];
			
 
				+
			
 
				+				if (p[c] > m_bounds[1][c])
			
 
				+					m_bounds[1][c] = p[c];
			
 
				+			}
			
 
				+
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; }
			
 
				+		inline       T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; }
			
 
				+
			
 
				+		const T& get_low() const { return m_bounds[0]; }
			
 
				+		T& get_low() { return m_bounds[0]; }
			
 
				+
			
 
				+		const T& get_high() const { return m_bounds[1]; }
			
 
				+		T& get_high() { return m_bounds[1]; }
			
 
				+
			
 
				+		scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; }
			
 
				+
			
 
				+		bool contains(const T& p) const
			
 
				+		{
			
 
				+			const T& low = get_low(), high = get_high();
			
 
				+
			
 
				+			for (uint32_t i = 0; i < N; i++)
			
 
				+			{
			
 
				+				if (p[i] < low[i])
			
 
				+					return false;
			
 
				+
			
 
				+				if (p[i] > high[i])
			
 
				+					return false;
			
 
				+			}
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+	private:
			
 
				+		T m_bounds[2];
			
 
				+	};
			
 
				+
			
 
				+	typedef vec_interval<vec1F> vec_interval1F;
			
 
				+	typedef vec_interval<vec2F> vec_interval2F;
			
 
				+	typedef vec_interval<vec3F> vec_interval3F;
			
 
				+	typedef vec_interval<vec4F> vec_interval4F;
			
 
				+
			
 
				+	typedef vec_interval2F aabb2F;
			
 
				+	typedef vec_interval3F aabb3F;
			
 
				+
			
 
				+	namespace intersection
			
 
				+	{
			
 
				+		enum result
			
 
				+		{
			
 
				+			cBackfacing = -1,
			
 
				+			cFailure = 0,
			
 
				+			cSuccess,
			
 
				+			cParallel,
			
 
				+			cInside,
			
 
				+		};
			
 
				+
			
 
				+		// Returns cInside, cSuccess, or cFailure.
			
 
				+		// Algorithm: Graphics Gems 1
			
 
				+		template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
			
 
				+		result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
			
 
				+		{
			
 
				+			enum
			
 
				+			{
			
 
				+				cNumDim = vector_type::num_elements,
			
 
				+				cRight = 0,
			
 
				+				cLeft = 1,
			
 
				+				cMiddle = 2
			
 
				+			};
			
 
				+
			
 
				+			bool inside = true;
			
 
				+			int quadrant[cNumDim];
			
 
				+			scalar_type candidate_plane[cNumDim];
			
 
				+
			
 
				+			for (int i = 0; i < cNumDim; i++)
			
 
				+			{
			
 
				+				if (ray.get_origin()[i] < box[0][i])
			
 
				+				{
			
 
				+					quadrant[i] = cLeft;
			
 
				+					candidate_plane[i] = box[0][i];
			
 
				+					inside = false;
			
 
				+				}
			
 
				+				else if (ray.get_origin()[i] > box[1][i])
			
 
				+				{
			
 
				+					quadrant[i] = cRight;
			
 
				+					candidate_plane[i] = box[1][i];
			
 
				+					inside = false;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					quadrant[i] = cMiddle;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (inside)
			
 
				+			{
			
 
				+				coord = ray.get_origin();
			
 
				+				t = 0.0f;
			
 
				+				return cInside;
			
 
				+			}
			
 
				+
			
 
				+			scalar_type max_t[cNumDim];
			
 
				+			for (int i = 0; i < cNumDim; i++)
			
 
				+			{
			
 
				+				if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f))
			
 
				+					max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i];
			
 
				+				else
			
 
				+					max_t[i] = -1.0f;
			
 
				+			}
			
 
				+
			
 
				+			int which_plane = 0;
			
 
				+			for (int i = 1; i < cNumDim; i++)
			
 
				+				if (max_t[which_plane] < max_t[i])
			
 
				+					which_plane = i;
			
 
				+
			
 
				+			if (max_t[which_plane] < 0.0f)
			
 
				+				return cFailure;
			
 
				+
			
 
				+			for (int i = 0; i < cNumDim; i++)
			
 
				+			{
			
 
				+				if (i != which_plane)
			
 
				+				{
			
 
				+					coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i];
			
 
				+
			
 
				+					if ((coord[i] < box[0][i]) || (coord[i] > box[1][i]))
			
 
				+						return cFailure;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					coord[i] = candidate_plane[i];
			
 
				+				}
			
 
				+
			
 
				+				assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]);
			
 
				+			}
			
 
				+
			
 
				+			t = max_t[which_plane];
			
 
				+			return cSuccess;
			
 
				+		}
			
 
				+
			
 
				+		template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
			
 
				+		result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
			
 
				+		{
			
 
				+			if (!box.contains(ray.get_origin()))
			
 
				+			{
			
 
				+				started_within = false;
			
 
				+				return ray_aabb(coord, t, ray, box);
			
 
				+			}
			
 
				+
			
 
				+			started_within = true;
			
 
				+
			
 
				+			typename vector_type::T diag_dist = box.diagonal_length() * 1.5f;
			
 
				+			ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction());
			
 
				+
			
 
				+			result res(ray_aabb(coord, t, outside_ray, box));
			
 
				+			if (res != cSuccess)
			
 
				+				return res;
			
 
				+
			
 
				+			t = basisu::maximum(0.0f, diag_dist - t);
			
 
				+			return cSuccess;
			
 
				+		}
			
 
				+
			
 
				+	} // intersect
			
 
				+
			
 
				+	// This float->half conversion matches how "F32TO16" works on Intel GPU's.
			
 
				+	// Input cannot be negative, Inf or Nan.
			
 
				+	inline basist::half_float float_to_half_non_neg_no_nan_inf(float val)
			
 
				+	{
			
 
				+		union { float f; int32_t i; uint32_t u; } fi = { val };
			
 
				+		const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
			
 
				+		int e = 0, m = 0;
			
 
				+
			
 
				+		assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
			
 
				+
			
 
				+		// not zero or denormal
			
 
				+		if (flt_e != 0)
			
 
				+		{
			
 
				+			int new_exp = flt_e - 127;
			
 
				+			if (new_exp > 15)
			
 
				+				e = 31;
			
 
				+			else if (new_exp < -14)
			
 
				+				m = lrintf((1 << 24) * fabsf(fi.f));
			
 
				+			else
			
 
				+			{
			
 
				+				e = new_exp + 15;
			
 
				+				m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		assert((0 <= m) && (m <= 1024));
			
 
				+		if (m == 1024)
			
 
				+		{
			
 
				+			e++;
			
 
				+			m = 0;
			
 
				+		}
			
 
				+
			
 
				+		assert((e >= 0) && (e <= 31));
			
 
				+		assert((m >= 0) && (m <= 1023));
			
 
				+
			
 
				+		basist::half_float result = (basist::half_float)((e << 10) | m);
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				+	// Supports positive and denormals only. No NaN or Inf.
			
 
				+	inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
			
 
				+	{
			
 
				+		assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
			
 
				+
			
 
				+		union fu32
			
 
				+		{
			
 
				+			uint32_t u;
			
 
				+			float f;
			
 
				+		};
			
 
				+
			
 
				+		static const fu32 K = { 0x77800000 };
			
 
				+
			
 
				+		fu32 o;
			
 
				+		o.u = h << 13;
			
 
				+		o.f *= K.f;
			
 
				+
			
 
				+		return o.f;
			
 
				+	}
			
 
				+				
			
 
				 } // namespace basisu
			
 
				 
			
 
				 
			
--- a/thirdparty/basis_universal/encoder/basisu_etc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_etc.cpp
@@ -1,5 +1,5 @@
 
				 // basis_etc.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_etc.h
+++ b/thirdparty/basis_universal/encoder/basisu_etc.h
@@ -1,5 +1,5 @@
 
				 // basis_etc.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_frontend.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_frontend.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_frontend.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -2347,6 +2347,7 @@ namespace basisu
 
				 						continue;
			
 
				 
			
 
				 					uint64_t overall_best_err = 0;
			
 
				+					(void)overall_best_err;
			
 
				 
			
 
				 					uint64_t total_err[4][4][4];
			
 
				 					clear_obj(total_err);
			
--- a/thirdparty/basis_universal/encoder/basisu_frontend.h
+++ b/thirdparty/basis_universal/encoder/basisu_frontend.h
@@ -1,5 +1,5 @@
 
				 // basisu_frontend.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_gpu_texture.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -15,13 +15,15 @@
 
				 #include "basisu_gpu_texture.h"
			
 
				 #include "basisu_enc.h"
			
 
				 #include "basisu_pvrtc1_4.h"
			
 
				-#if BASISU_USE_ASTC_DECOMPRESS
			
 
				-#include "basisu_astc_decomp.h"
			
 
				-#endif
			
 
				+#include "3rdparty/android_astc_decomp.h"
			
 
				 #include "basisu_bc7enc.h"
			
 
				+#include "../transcoder/basisu_astc_hdr_core.h"
			
 
				 
			
 
				 namespace basisu
			
 
				 {
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// ETC2 EAC
			
 
				+
			
 
				 	void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
			
 
				 	{
			
 
				 		static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
			
@@ -56,6 +58,8 @@ namespace basisu
 
				 		pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
			
 
				 	}
			
 
				 
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// BC1
			
 
				 	struct bc1_block
			
 
				 	{
			
 
				 		enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
			
@@ -274,6 +278,9 @@ namespace basisu
 
				 		return used_punchthrough;
			
 
				 	}
			
 
				 
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// BC3-5
			
 
				+
			
 
				 	struct bc4_block
			
 
				 	{
			
 
				 		enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
			
@@ -372,7 +379,8 @@ namespace basisu
 
				 		unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
			
 
				 		unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
			
 
				 	}
			
 
				-
			
 
				+	
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				 	// ATC isn't officially documented, so I'm assuming these references:
			
 
				 	// http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
			
 
				 	// https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
			
@@ -426,6 +434,7 @@ namespace basisu
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				 	// BC7 mode 0-7 decompression.
			
 
				 	// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
			
 
				 
			
@@ -742,6 +751,255 @@ namespace basisu
 
				 		return false;
			
 
				 	}
			
 
				 	
			
 
				+	static inline int bc6h_sign_extend(int val, int bits)
			
 
				+	{
			
 
				+		assert((bits >= 1) && (bits < 32));
			
 
				+		assert((val >= 0) && (val < (1 << bits)));
			
 
				+		return (val << (32 - bits)) >> (32 - bits);
			
 
				+	}
			
 
				+
			
 
				+	static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed)
			
 
				+	{
			
 
				+		int bitmask = ((1 << num_bits) - 1);
			
 
				+		int v = (base + delta) & bitmask;
			
 
				+		return is_signed ? bc6h_sign_extend(v, num_bits) : v;
			
 
				+	}
			
 
				+
			
 
				+	static int bc6h_dequantize(int val, int bits, int is_signed)
			
 
				+	{
			
 
				+		int result;
			
 
				+		if (is_signed)
			
 
				+		{
			
 
				+			if (bits >= 16)
			
 
				+				result = val;
			
 
				+			else
			
 
				+			{
			
 
				+				int s_flag = 0;
			
 
				+				if (val < 0)
			
 
				+				{
			
 
				+					s_flag = 1;
			
 
				+					val = -val;
			
 
				+				}
			
 
				+
			
 
				+				if (val == 0)
			
 
				+					result = 0;
			
 
				+				else if (val >= ((1 << (bits - 1)) - 1))
			
 
				+					result = 0x7FFF;
			
 
				+				else
			
 
				+					result = ((val << 15) + 0x4000) >> (bits - 1);
			
 
				+
			
 
				+				if (s_flag)
			
 
				+					result = -result;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (bits >= 15)
			
 
				+				result = val;
			
 
				+			else if (!val)
			
 
				+				result = 0;
			
 
				+			else if (val == ((1 << bits) - 1))
			
 
				+				result = 0xFFFF;
			
 
				+			else
			
 
				+				result = ((val << 16) + 0x8000) >> bits;
			
 
				+		}
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				+	static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index)
			
 
				+	{
			
 
				+		return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6;
			
 
				+	}
			
 
				+
			
 
				+	static inline basist::half_float bc6h_convert_to_half(int val, int is_signed)
			
 
				+	{
			
 
				+		if (!is_signed)
			
 
				+		{
			
 
				+			// scale by 31/64
			
 
				+			return (basist::half_float)((val * 31) >> 6);
			
 
				+		}
			
 
				+
			
 
				+		// scale by 31/32
			
 
				+		val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5;
			
 
				+
			
 
				+		int s = 0;
			
 
				+		if (val < 0)
			
 
				+		{
			
 
				+			s = 0x8000;
			
 
				+			val = -val;
			
 
				+		}
			
 
				+
			
 
				+		return (basist::half_float)(s | val);
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits)
			
 
				+	{
			
 
				+		assert((num_bits) && (num_bits <= 63));
			
 
				+
			
 
				+		uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U));
			
 
				+
			
 
				+		l >>= num_bits;
			
 
				+		l |= (h << (64U - num_bits));
			
 
				+		h >>= num_bits;
			
 
				+
			
 
				+		total_bits += num_bits;
			
 
				+		assert(total_bits <= 128);
			
 
				+
			
 
				+		return v;
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits)
			
 
				+	{
			
 
				+		uint32_t res = 0;
			
 
				+		for (uint32_t i = 0; i < num_bits; i++)
			
 
				+		{
			
 
				+			uint32_t bit = (v & (1u << i)) != 0u;
			
 
				+			res |= (bit << (num_bits - 1u - i));
			
 
				+		}
			
 
				+		return res;
			
 
				+	}
			
 
				+
			
 
				+	static inline uint64_t bc6h_read_le_qword(const void* p)
			
 
				+	{
			
 
				+		const uint8_t* pSrc = static_cast<const uint8_t*>(p);
			
 
				+		return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U);
			
 
				+	}
			
 
				+
			
 
				+	bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs)
			
 
				+	{
			
 
				+		assert(dest_pitch_in_halfs >= 4 * 3);
			
 
				+
			
 
				+		const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3;
			
 
				+
			
 
				+		const uint8_t* pSrc = static_cast<const uint8_t*>(pSrc_block);
			
 
				+		basist::half_float* pDst = static_cast<basist::half_float*>(pDst_block);
			
 
				+
			
 
				+		uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t));
			
 
				+
			
 
				+		// Unpack mode
			
 
				+		const int mode = basist::g_bc6h_mode_lookup[blo & 31];
			
 
				+		if (mode < 0)
			
 
				+		{
			
 
				+			for (int y = 0; y < 4; y++)
			
 
				+			{
			
 
				+				memset(pDst, 0, sizeof(basist::half_float) * 4);
			
 
				+				pDst += dest_pitch_in_halfs;
			
 
				+			}
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		// Skip mode bits
			
 
				+		uint32_t total_bits_read = 0;
			
 
				+		bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read);
			
 
				+
			
 
				+		assert(mode < (int)basist::NUM_BC6H_MODES);
			
 
				+
			
 
				+		const uint32_t num_subsets = (mode >= 10) ? 1 : 2;
			
 
				+		const bool is_mode_9_or_10 = (mode == 9) || (mode == 10);
			
 
				+
			
 
				+		// Unpack endpoint components
			
 
				+		int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } };		// [subset][comp][l/h]
			
 
				+		int part_index = 0;
			
 
				+
			
 
				+		uint32_t layout_index = 0;
			
 
				+		while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX)
			
 
				+		{
			
 
				+			const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index];
			
 
				+
			
 
				+			if (layout.m_comp < 0)
			
 
				+				break;
			
 
				+
			
 
				+			const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1;
			
 
				+			assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS)));
			
 
				+
			
 
				+			const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit;
			
 
				+			assert(last_bit >= 0);
			
 
				+
			
 
				+			int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index];
			
 
				+
			
 
				+			if (first_bit < 0)
			
 
				+			{
			
 
				+				res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				const int total_bits = iabs(last_bit - first_bit) + 1;
			
 
				+				const int bit_shift = basisu::minimum(first_bit, last_bit);
			
 
				+
			
 
				+				int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read);
			
 
				+
			
 
				+				if (last_bit < first_bit)
			
 
				+					b = bc6h_reverse_bits(b, total_bits);
			
 
				+
			
 
				+				res |= (b << bit_shift);
			
 
				+			}
			
 
				+
			
 
				+			layout_index++;
			
 
				+		}
			
 
				+		assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX);
			
 
				+
			
 
				+		// Sign extend/dequantize endpoints
			
 
				+		const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0];
			
 
				+		if (is_signed)
			
 
				+		{
			
 
				+			for (uint32_t comp = 0; comp < 3; comp++)
			
 
				+				comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits);
			
 
				+		}
			
 
				+
			
 
				+		if (is_signed || !is_mode_9_or_10)
			
 
				+		{
			
 
				+			for (uint32_t subset = 0; subset < num_subsets; subset++)
			
 
				+				for (uint32_t comp = 0; comp < 3; comp++)
			
 
				+					for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
			
 
				+						comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]);
			
 
				+		}
			
 
				+
			
 
				+		if (!is_mode_9_or_10)
			
 
				+		{
			
 
				+			for (uint32_t subset = 0; subset < num_subsets; subset++)
			
 
				+				for (uint32_t comp = 0; comp < 3; comp++)
			
 
				+					for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
			
 
				+						comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed);
			
 
				+		}
			
 
				+
			
 
				+		for (uint32_t subset = 0; subset < num_subsets; subset++)
			
 
				+			for (uint32_t comp = 0; comp < 3; comp++)
			
 
				+				for (uint32_t lh = 0; lh < 2; lh++)
			
 
				+					comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed);
			
 
				+
			
 
				+		// Now unpack weights and output texels
			
 
				+		const int weight_bits = (mode >= 10) ? 4 : 3;
			
 
				+		const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3;
			
 
				+
			
 
				+		dest_pitch_in_halfs -= 4 * 3;
			
 
				+
			
 
				+		for (uint32_t y = 0; y < 4; y++)
			
 
				+		{
			
 
				+			for (uint32_t x = 0; x < 4; x++)
			
 
				+			{
			
 
				+				int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x];
			
 
				+				const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0);
			
 
				+
			
 
				+				subset &= 1;
			
 
				+
			
 
				+				const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read);
			
 
				+
			
 
				+				pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed);
			
 
				+				pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed);
			
 
				+				pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed);
			
 
				+
			
 
				+				pDst += 3;
			
 
				+			}
			
 
				+
			
 
				+			pDst += dest_pitch_in_halfs;
			
 
				+		}
			
 
				+
			
 
				+		assert(total_bits_read == 128);
			
 
				+		return true;
			
 
				+	}
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1)
			
 
				+
			
 
				 	struct fxt1_block
			
 
				 	{
			
 
				 		union
			
@@ -901,6 +1159,9 @@ namespace basisu
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!)
			
 
				+
			
 
				 	struct pvrtc2_block
			
 
				 	{
			
 
				 		uint8_t m_modulation[4];
			
@@ -1015,6 +1276,9 @@ namespace basisu
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// ETC2 EAC R11 or RG11
			
 
				+
			
 
				 	struct etc2_eac_r11
			
 
				 	{
			
 
				 		uint64_t m_base	: 8;
			
@@ -1085,13 +1349,16 @@ namespace basisu
 
				 			unpack_etc2_eac_r(pBlock, pPixels, c);
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// UASTC
			
 
				+
			
 
				 	void unpack_uastc(const void* p, color_rgba* pPixels)
			
 
				 	{
			
 
				 		basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);
			
 
				 	}
			
 
				-	
			
 
				-	// Unpacks to RGBA, R, RG, or A
			
 
				+			
			
 
				+	// Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only.
			
 
				 	bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
			
 
				 	{
			
 
				 		switch (fmt)
			
@@ -1150,14 +1417,24 @@ namespace basisu
 
				 			unpack_etc2_eac(pBlock, pPixels);
			
 
				 			break;
			
 
				 		}
			
 
				-		case texture_format::cASTC4x4:
			
 
				+		case texture_format::cBC6HSigned:
			
 
				+		case texture_format::cBC6HUnsigned:
			
 
				+		case texture_format::cASTC_HDR_4x4:
			
 
				+		case texture_format::cUASTC_HDR_4x4:
			
 
				+		{
			
 
				+			// Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data.
			
 
				+			assert(0);
			
 
				+			return false;
			
 
				+		}
			
 
				+		case texture_format::cASTC_LDR_4x4:
			
 
				 		{
			
 
				-#if BASISU_USE_ASTC_DECOMPRESS
			
 
				 			const bool astc_srgb = false;
			
 
				-			basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
			
 
				-#else
			
 
				-			memset(pPixels, 255, 16 * sizeof(color_rgba));
			
 
				-#endif
			
 
				+			bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
			
 
				+			assert(status);
			
 
				+
			
 
				+			if (!status)
			
 
				+				return false;
			
 
				+			
			
 
				 			break;
			
 
				 		}
			
 
				 		case texture_format::cATC_RGB:
			
@@ -1206,6 +1483,66 @@ namespace basisu
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				+	bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels)
			
 
				+	{
			
 
				+		switch (fmt)
			
 
				+		{
			
 
				+			case texture_format::cASTC_HDR_4x4:
			
 
				+			case texture_format::cUASTC_HDR_4x4:
			
 
				+			{
			
 
				+#if 1
			
 
				+				bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4);
			
 
				+				assert(status);
			
 
				+				if (!status)
			
 
				+					return false;
			
 
				+#else
			
 
				+				basist::half_float half_block[16][4];
			
 
				+				
			
 
				+				astc_helpers::log_astc_block log_blk;
			
 
				+				if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4))
			
 
				+					return false;
			
 
				+				if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16))
			
 
				+					return false;
			
 
				+
			
 
				+				for (uint32_t p = 0; p < 16; p++)
			
 
				+				{
			
 
				+					pPixels[p][0] = basist::half_to_float(half_block[p][0]);
			
 
				+					pPixels[p][1] = basist::half_to_float(half_block[p][1]);
			
 
				+					pPixels[p][2] = basist::half_to_float(half_block[p][2]);
			
 
				+					pPixels[p][3] = basist::half_to_float(half_block[p][3]);
			
 
				+				}
			
 
				+
			
 
				+				//memset(pPixels, 0, sizeof(vec4F) * 16);
			
 
				+#endif
			
 
				+				return true;
			
 
				+			}
			
 
				+			case texture_format::cBC6HSigned:
			
 
				+			case texture_format::cBC6HUnsigned:
			
 
				+			{
			
 
				+				basist::half_float half_block[16][3];
			
 
				+
			
 
				+				unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned);
			
 
				+
			
 
				+				for (uint32_t p = 0; p < 16; p++)
			
 
				+				{
			
 
				+					pPixels[p][0] = basist::half_to_float(half_block[p][0]);
			
 
				+					pPixels[p][1] = basist::half_to_float(half_block[p][1]);
			
 
				+					pPixels[p][2] = basist::half_to_float(half_block[p][2]);
			
 
				+					pPixels[p][3] = 1.0f;
			
 
				+				}
			
 
				+
			
 
				+				return true;
			
 
				+			}
			
 
				+			default:
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		assert(0);
			
 
				+		return false;
			
 
				+	}
			
 
				+		
			
 
				 	bool gpu_image::unpack(image& img) const
			
 
				 	{
			
 
				 		img.resize(get_pixel_width(), get_pixel_height());
			
@@ -1252,7 +1589,48 @@ namespace basisu
 
				 
			
 
				 		return success;
			
 
				 	}
			
 
				+
			
 
				+	bool gpu_image::unpack_hdr(imagef& img) const
			
 
				+	{
			
 
				+		if ((m_fmt != texture_format::cASTC_HDR_4x4) && 
			
 
				+			(m_fmt != texture_format::cUASTC_HDR_4x4) &&
			
 
				+			(m_fmt != texture_format::cBC6HUnsigned) &&
			
 
				+			(m_fmt != texture_format::cBC6HSigned))
			
 
				+		{
			
 
				+			// Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.)
			
 
				+			assert(0);
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		img.resize(get_pixel_width(), get_pixel_height());
			
 
				+		img.set_all(vec4F(0.0f));
			
 
				+
			
 
				+		if (!img.get_width() || !img.get_height())
			
 
				+			return true;
			
 
				+
			
 
				+		assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
			
 
				+		vec4F pixels[cMaxBlockSize * cMaxBlockSize];
			
 
				+		clear_obj(pixels);
			
 
				+
			
 
				+		bool success = true;
			
 
				+
			
 
				+		for (uint32_t by = 0; by < m_blocks_y; by++)
			
 
				+		{
			
 
				+			for (uint32_t bx = 0; bx < m_blocks_x; bx++)
			
 
				+			{
			
 
				+				const void* pBlock = get_block_ptr(bx, by);
			
 
				+
			
 
				+				if (!unpack_block_hdr(m_fmt, pBlock, pixels))
			
 
				+					success = false;
			
 
				+
			
 
				+				img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
			
 
				+			} // bx
			
 
				+		} // by
			
 
				+
			
 
				+		return success;
			
 
				+	}
			
 
				 		
			
 
				+	// KTX1 texture file writing
			
 
				 	static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
			
 
				 
			
 
				 	// KTX/GL enums
			
@@ -1273,6 +1651,8 @@ namespace basisu
 
				 		KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
			
 
				 		KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
			
 
				 		KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
			
 
				+		KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E,
			
 
				+		KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F,
			
 
				 		KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
			
 
				 		KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
			
 
				 		KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
			
@@ -1319,6 +1699,7 @@ namespace basisu
 
				 		uint32_t width = 0, height = 0, total_levels = 0;
			
 
				 		basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
			
 
				 
			
 
				+		// Sanity check the input
			
 
				 		if (cubemap_flag)
			
 
				 		{
			
 
				 			if ((gpu_images.size() % 6) != 0)
			
@@ -1327,7 +1708,7 @@ namespace basisu
 
				 				return false;
			
 
				 			}
			
 
				 		}
			
 
				-
			
 
				+				
			
 
				 		for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
			
 
				 		{
			
 
				 			const gpu_image_vec &levels = gpu_images[array_index];
			
@@ -1426,6 +1807,18 @@ namespace basisu
 
				 			base_internal_fmt = KTX_RGBA;
			
 
				 			break;
			
 
				 		}
			
 
				+		case texture_format::cBC6HSigned:
			
 
				+		{
			
 
				+			internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
			
 
				+			base_internal_fmt = KTX_RGBA;
			
 
				+			break;
			
 
				+		}
			
 
				+		case texture_format::cBC6HUnsigned:
			
 
				+		{
			
 
				+			internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
			
 
				+			base_internal_fmt = KTX_RGBA;
			
 
				+			break;
			
 
				+		}
			
 
				 		case texture_format::cBC7:
			
 
				 		{
			
 
				 			internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
			
@@ -1443,7 +1836,10 @@ namespace basisu
 
				 			base_internal_fmt = KTX_RGBA;
			
 
				 			break;
			
 
				 		}
			
 
				-		case texture_format::cASTC4x4:
			
 
				+		// We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC.
			
 
				+		case texture_format::cASTC_LDR_4x4:
			
 
				+		case texture_format::cASTC_HDR_4x4:
			
 
				+		case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC
			
 
				 		{
			
 
				 			internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
			
 
				 			base_internal_fmt = KTX_RGBA;
			
@@ -1496,17 +1892,17 @@ namespace basisu
 
				 			return false;
			
 
				 		}
			
 
				 		}
			
 
				-		
			
 
				+
			
 
				 		ktx_header header;
			
 
				 		header.clear();
			
 
				 		memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
			
 
				 		header.m_endianness = KTX_ENDIAN;
			
 
				-		
			
 
				+
			
 
				 		header.m_pixelWidth = width;
			
 
				 		header.m_pixelHeight = height;
			
 
				-				
			
 
				+
			
 
				 		header.m_glTypeSize = 1;
			
 
				-		
			
 
				+
			
 
				 		header.m_glInternalFormat = internal_fmt;
			
 
				 		header.m_glBaseInternalFormat = base_internal_fmt;
			
 
				 
			
@@ -1517,12 +1913,12 @@ namespace basisu
 
				 		header.m_numberOfMipmapLevels = total_levels;
			
 
				 		header.m_numberOfFaces = cubemap_flag ? 6 : 1;
			
 
				 
			
 
				-		append_vector(ktx_data, (uint8_t *)&header, sizeof(header));
			
 
				+		append_vector(ktx_data, (uint8_t*)&header, sizeof(header));
			
 
				 
			
 
				 		for (uint32_t level_index = 0; level_index < total_levels; level_index++)
			
 
				 		{
			
 
				 			uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
			
 
				-			
			
 
				+
			
 
				 			if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
			
 
				 			{
			
 
				 				img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
			
@@ -1531,9 +1927,10 @@ namespace basisu
 
				 			assert(img_size && ((img_size & 3) == 0));
			
 
				 
			
 
				 			packed_uint<4> packed_img_size(img_size);
			
 
				-			append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size));
			
 
				+			append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size));
			
 
				 
			
 
				 			uint32_t bytes_written = 0;
			
 
				+			(void)bytes_written;
			
 
				 
			
 
				 			for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
			
 
				 			{
			
@@ -1541,11 +1938,11 @@ namespace basisu
 
				 				{
			
 
				 					const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
			
 
				 
			
 
				-					append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes());
			
 
				-					
			
 
				+					append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes());
			
 
				+
			
 
				 					bytes_written += img.get_size_in_bytes();
			
 
				 				}
			
 
				-			
			
 
				+
			
 
				 			} // array_index
			
 
				 
			
 
				 		} // level_index
			
@@ -1553,7 +1950,58 @@ namespace basisu
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				-	bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag)
			
 
				+	bool does_dds_support_format(texture_format fmt)
			
 
				+	{
			
 
				+		switch (fmt)
			
 
				+		{
			
 
				+		case texture_format::cBC1_NV:
			
 
				+		case texture_format::cBC1_AMD:
			
 
				+		case texture_format::cBC1:
			
 
				+		case texture_format::cBC3:
			
 
				+		case texture_format::cBC4:
			
 
				+		case texture_format::cBC5:
			
 
				+		case texture_format::cBC6HSigned:
			
 
				+		case texture_format::cBC6HUnsigned:
			
 
				+		case texture_format::cBC7:
			
 
				+			return true;
			
 
				+		default:
			
 
				+			break;
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	// Only supports the basic DirectX BC texture formats.
			
 
				+	// gpu_images array is: [face/layer][mipmap level]
			
 
				+	// For cubemap arrays, # of face/layers must be a multiple of 6.
			
 
				+	// Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped
			
 
				+	// and cubemap, cubemap mipmapped, and cubemap array mipmapped.
			
 
				+	bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
			
 
				+	{		
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
			
 
				+	{
			
 
				+		uint8_vec dds_data;
			
 
				+
			
 
				+		if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format))
			
 
				+			return false;
			
 
				+
			
 
				+		if (!write_vec_to_file(pFilename, dds_data))
			
 
				+		{
			
 
				+			fprintf(stderr, "write_dds_file: Failed writing DDS file data\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+		
			
 
				+	bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips,	basisu::vector<imagef>& hdr_mips)
			
 
				+	{
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format)
			
 
				 	{
			
 
				 		std::string extension(string_tolower(string_get_extension(pFilename)));
			
 
				 
			
@@ -1570,8 +2018,8 @@ namespace basisu
 
				 		}
			
 
				 		else if (extension == "dds")
			
 
				 		{
			
 
				-			// TODO
			
 
				-			return false;
			
 
				+			if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format))
			
 
				+				return false;
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
@@ -1583,11 +2031,18 @@ namespace basisu
 
				 		return basisu::write_vec_to_file(pFilename, filedata);
			
 
				 	}
			
 
				 
			
 
				-	bool write_compressed_texture_file(const char* pFilename, const gpu_image& g)
			
 
				+	bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format)
			
 
				+	{
			
 
				+		basisu::vector<gpu_image_vec> a;
			
 
				+		a.push_back(g);
			
 
				+		return write_compressed_texture_file(pFilename, a, false, use_srgb_format);
			
 
				+	}
			
 
				+
			
 
				+	bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format)
			
 
				 	{
			
 
				 		basisu::vector<gpu_image_vec> v;
			
 
				 		enlarge_vector(v, 1)->push_back(g);
			
 
				-		return write_compressed_texture_file(pFilename, v, false);
			
 
				+		return write_compressed_texture_file(pFilename, v, false, use_srgb_format);
			
 
				 	}
			
 
				 
			
 
				 	//const uint32_t OUT_FILE_MAGIC = 'TEXC';
			
@@ -1626,5 +2081,49 @@ namespace basisu
 
				 		
			
 
				 		return fclose(pFile) != EOF;
			
 
				 	}
			
 
				+
			
 
				+	// The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting 
			
 
				+	// its usefulness/relevance.
			
 
				+	// https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md
			
 
				+	bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y)
			
 
				+	{
			
 
				+		assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0));
			
 
				+
			
 
				+		uint8_vec file_data;
			
 
				+		file_data.push_back(0x13);
			
 
				+		file_data.push_back(0xAB);
			
 
				+		file_data.push_back(0xA1);
			
 
				+		file_data.push_back(0x5C);
			
 
				+
			
 
				+		file_data.push_back((uint8_t)block_width);
			
 
				+		file_data.push_back((uint8_t)block_height);
			
 
				+		file_data.push_back(1);
			
 
				+
			
 
				+		file_data.push_back((uint8_t)dim_x);
			
 
				+		file_data.push_back((uint8_t)(dim_x >> 8));
			
 
				+		file_data.push_back((uint8_t)(dim_x >> 16));
			
 
				+
			
 
				+		file_data.push_back((uint8_t)dim_y);
			
 
				+		file_data.push_back((uint8_t)(dim_y >> 8));
			
 
				+		file_data.push_back((uint8_t)(dim_y >> 16));
			
 
				+
			
 
				+		file_data.push_back((uint8_t)1);
			
 
				+		file_data.push_back((uint8_t)0);
			
 
				+		file_data.push_back((uint8_t)0);
			
 
				+
			
 
				+		const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width;
			
 
				+		const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height;
			
 
				+
			
 
				+		const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16;
			
 
				+
			
 
				+		const size_t cur_size = file_data.size();
			
 
				+
			
 
				+		file_data.resize(cur_size + total_bytes);
			
 
				+
			
 
				+		memcpy(&file_data[cur_size], pBlocks, total_bytes);
			
 
				+
			
 
				+		return write_vec_to_file(pFilename, file_data);
			
 
				+	}
			
 
				+		
			
 
				 } // basisu
			
 
				 
			
--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.h
+++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h
@@ -1,5 +1,5 @@
 
				 // basisu_gpu_texture.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -48,6 +48,7 @@ namespace basisu
 
				 		}
			
 
				 
			
 
				 		inline texture_format get_format() const { return m_fmt; }
			
 
				+		inline bool is_hdr() const { return is_hdr_texture_format(m_fmt); }
			
 
				 		
			
 
				 		// Width/height in pixels
			
 
				 		inline uint32_t get_pixel_width() const { return m_width; }
			
@@ -100,9 +101,13 @@ namespace basisu
 
				 			m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block);
			
 
				 		}
			
 
				 
			
 
				+		// Unpacks LDR textures only.
			
 
				 		bool unpack(image& img) const;
			
 
				+
			
 
				+		// Unpacks HDR textures only.
			
 
				+		bool unpack_hdr(imagef& img) const;
			
 
				 		
			
 
				-		void override_dimensions(uint32_t w, uint32_t h)
			
 
				+		inline void override_dimensions(uint32_t w, uint32_t h)
			
 
				 		{
			
 
				 			m_width = w;
			
 
				 			m_height = h;
			
@@ -116,39 +121,50 @@ namespace basisu
 
				 
			
 
				 	typedef basisu::vector<gpu_image> gpu_image_vec;
			
 
				 
			
 
				-	// KTX file writing
			
 
				-
			
 
				+	// KTX1 file writing
			
 
				 	bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag);
			
 
				-		
			
 
				-	bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag);
			
 
				 	
			
 
				-	inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g)
			
 
				-	{
			
 
				-		basisu::vector<gpu_image_vec> a;
			
 
				-		a.push_back(g);
			
 
				-		return write_compressed_texture_file(pFilename, a, false);
			
 
				-	}
			
 
				+	bool does_dds_support_format(texture_format fmt);
			
 
				+	bool write_dds_file(uint8_vec& dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format);
			
 
				+	bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format);
			
 
				+
			
 
				+	// Currently reads 2D 32bpp RGBA, 16-bit HALF RGBA, or 32-bit FLOAT RGBA, with or without mipmaps. No tex arrays or cubemaps, yet.
			
 
				+	bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image>& ldr_mips, basisu::vector<imagef>& hdr_mips);
			
 
				 
			
 
				-	bool write_compressed_texture_file(const char *pFilename, const gpu_image &g);
			
 
				+	// Supports DDS and KTX
			
 
				+	bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format);
			
 
				+	bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format);
			
 
				+	bool write_compressed_texture_file(const char *pFilename, const gpu_image &g, bool use_srgb_format);
			
 
				 	
			
 
				 	bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi);
			
 
				 
			
 
				 	// GPU texture block unpacking
			
 
				+	// For ETC1, use in basisu_etc.h: bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha)
			
 
				 	void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels);
			
 
				 	bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha);
			
 
				 	void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride);
			
 
				 	bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels);
			
 
				 	void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels);
			
 
				 	bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels);
			
 
				-	bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels);
			
 
				+	bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); // full format
			
 
				+	bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs = 4 * 3); // full format, outputs HALF values, RGB texels only (not RGBA)
			
 
				 	void unpack_atc(const void* pBlock_bits, color_rgba* pPixels);
			
 
				+	// We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
			
 
				 	bool unpack_fxt1(const void* p, color_rgba* pPixels);
			
 
				+	// PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
			
 
				 	bool unpack_pvrtc2(const void* p, color_rgba* pPixels);
			
 
				 	void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c);
			
 
				 	void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels);
			
 
				-
			
 
				+	
			
 
				 	// unpack_block() is primarily intended to unpack texture data created by the transcoder.
			
 
				-	// For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation.
			
 
				+	// For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not yet a complete implementation.
			
 
				+	// Unpacks LDR texture formats only.
			
 
				 	bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels);
			
 
				-			
			
 
				+
			
 
				+	// Unpacks HDR texture formats only.
			
 
				+	bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels);
			
 
				+	
			
 
				+	bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y);
			
 
				+							
			
 
				 } // namespace basisu
			
 
				+
			
--- a/thirdparty/basis_universal/encoder/basisu_kernels_declares.h
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h
@@ -1,5 +1,5 @@
 
				 // basisu_kernels_declares.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
@@ -1,5 +1,5 @@
 
				 // basisu_kernels_imp.h - Do not directly include
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_kernels_sse.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -22,22 +22,6 @@
 
				 #include <intrin.h>
			
 
				 #endif
			
 
				 
			
 
				-#if !defined(_MSC_VER)
			
 
				-	#if __AVX__ || __AVX2__ || __AVX512F__
			
 
				-		#error Please check your compiler options
			
 
				-	#endif
			
 
				-	
			
 
				-	#if CPPSPMD_SSE2
			
 
				-		#if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__
			
 
				-			#error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file
			
 
				-		#endif
			
 
				-	#else
			
 
				-		#if !__SSE4_1__ || !__SSE3__ || !__SSSE3__
			
 
				-			#error Please check your compiler options
			
 
				-		#endif
			
 
				-	#endif
			
 
				-#endif
			
 
				-
			
 
				 #include "cppspmd_sse.h"
			
 
				 
			
 
				 #include "cppspmd_type_aliases.h"
			
--- a/thirdparty/basis_universal/encoder/basisu_miniz.h
+++ b/thirdparty/basis_universal/encoder/basisu_miniz.h
@@ -3,7 +3,7 @@
 
				   
			
 
				    Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ 
			
 
				    
			
 
				-   Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+   Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 
			
 
				    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				    you may not use this file except in compliance with the License.
			
@@ -1973,7 +1973,7 @@ static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahe
 
				                    (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
			
 
				     if (!probe_len)
			
 
				     {
			
 
				-      *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break;
			
 
				+      *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break;
			
 
				     }
			
 
				     else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len)
			
 
				     {
			
@@ -2101,7 +2101,7 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d)
 
				 
			
 
				       total_lz_bytes += cur_match_len;
			
 
				       lookahead_pos += cur_match_len;
			
 
				-      dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE);
			
 
				+      dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE);
			
 
				       cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
			
 
				       MZ_ASSERT(lookahead_size >= cur_match_len);
			
 
				       lookahead_size -= cur_match_len;
			
@@ -2129,7 +2129,7 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d)
 
				       d->m_huff_count[0][lit]++;
			
 
				 
			
 
				       lookahead_pos++;
			
 
				-      dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE);
			
 
				+      dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE);
			
 
				       cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
			
 
				       lookahead_size--;
			
 
				 
			
@@ -2283,7 +2283,7 @@ static mz_bool tdefl_compress_normal(tdefl_compressor *d)
 
				     d->m_lookahead_pos += len_to_move;
			
 
				     MZ_ASSERT(d->m_lookahead_size >= len_to_move);
			
 
				     d->m_lookahead_size -= len_to_move;
			
 
				-    d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE);
			
 
				+    d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE);
			
 
				     // Check if it's time to flush the current LZ codes to the internal output buffer.
			
 
				     if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
			
 
				          ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) )
			
--- a/thirdparty/basis_universal/encoder/basisu_opencl.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_opencl.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_opencl.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_opencl.h
+++ b/thirdparty/basis_universal/encoder/basisu_opencl.h
@@ -1,5 +1,5 @@
 
				 // basisu_opencl.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Note: Undefine or set BASISU_SUPPORT_OPENCL to 0 to completely OpenCL support.
			
 
				 //
			
--- a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_pvrtc1_4.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h
+++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h
@@ -1,5 +1,5 @@
 
				 // basisu_pvrtc1_4.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -231,7 +231,18 @@ namespace basisu
 
				 
			
 
				 		inline void set_to_black()
			
 
				 		{
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
			
 
				+#endif                          
			
 
				+#endif
			
 
				 			memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes());
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif                
			
 
				+#endif
			
 
				 		}
			
 
				 
			
 
				 		inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const
			
--- a/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_resampler_filters.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_resampler.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_resampler.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_resampler.cpp
			
 
				-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_resampler.h
+++ b/thirdparty/basis_universal/encoder/basisu_resampler.h
@@ -1,5 +1,5 @@
 
				 // basisu_resampler.h
			
 
				-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
+++ b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
@@ -1,5 +1,5 @@
 
				 // basisu_resampler_filters.h
			
 
				-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_ssim.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_ssim.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_ssim.cpp
			
 
				-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_ssim.h
+++ b/thirdparty/basis_universal/encoder/basisu_ssim.h
@@ -1,5 +1,5 @@
 
				 // basisu_ssim.h
			
 
				-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_uastc_enc.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -13,11 +13,7 @@
 
				 // See the License for the specific language governing permissions and
			
 
				 // limitations under the License.
			
 
				 #include "basisu_uastc_enc.h"
			
 
				-
			
 
				-#if BASISU_USE_ASTC_DECOMPRESS
			
 
				-#include "basisu_astc_decomp.h"
			
 
				-#endif
			
 
				-
			
 
				+#include "3rdparty/android_astc_decomp.h"
			
 
				 #include "basisu_gpu_texture.h"
			
 
				 #include "basisu_bc7enc.h"
			
 
				 
			
@@ -384,6 +380,7 @@ namespace basisu
 
				 		}
			
 
				 
			
 
				 		uint32_t total_endpoint_bits = 0;
			
 
				+		(void)total_endpoint_bits;
			
 
				 
			
 
				 		for (uint32_t i = 0; i < total_tq_values; i++)
			
 
				 		{
			
@@ -428,6 +425,8 @@ namespace basisu
 
				 #endif
			
 
				 
			
 
				 		uint32_t total_weight_bits = 0;
			
 
				+		(void)total_weight_bits;
			
 
				+
			
 
				 		const uint32_t plane_shift = (total_planes == 2) ? 1 : 0;
			
 
				 		for (uint32_t i = 0; i < 16 * total_planes; i++)
			
 
				 		{
			
@@ -3175,6 +3174,7 @@ namespace basisu
 
				 		const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0);
			
 
				 		//const bool etc1_perceptual = true;
			
 
				 		
			
 
				+		// TODO: This uses 64KB of stack space!
			
 
				 		uastc_encode_results results[MAX_ENCODE_RESULTS];
			
 
				 						
			
 
				 		level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow);
			
@@ -3567,7 +3567,6 @@ namespace basisu
 
				 			success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false);
			
 
				 			VALIDATE(success);
			
 
				 
			
 
				-#if BASISU_USE_ASTC_DECOMPRESS
			
 
				 			// Now round trip to packed ASTC and back, then decode to pixels.
			
 
				 			uint32_t astc_data[4];
			
 
				 			
			
@@ -3580,7 +3579,7 @@ namespace basisu
 
				 			}
			
 
				 
			
 
				 			color_rgba decoded_astc_block[4][4];
			
 
				-			success = basisu_astc::astc::decompress((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4);
			
 
				+			success = basisu_astc::astc::decompress_ldr((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4);
			
 
				 			VALIDATE(success);
			
 
				 
			
 
				 			for (uint32_t y = 0; y < 4; y++)
			
@@ -3595,7 +3594,6 @@ namespace basisu
 
				 					VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a);
			
 
				 				}
			
 
				 			}
			
 
				-#endif
			
 
				 		}
			
 
				 #endif
			
 
				 
			
@@ -3789,8 +3787,9 @@ namespace basisu
 
				 	{
			
 
				 		uint64_t m_sel;
			
 
				 		uint32_t m_ofs;
			
 
				+		uint32_t m_pad; // avoid implicit padding for selector_bitsequence_hash
			
 
				 		selector_bitsequence() { }
			
 
				-		selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs) { }
			
 
				+		selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs), m_pad(0) { }
			
 
				 		bool operator== (const selector_bitsequence& other) const
			
 
				 		{
			
 
				 			return (m_ofs == other.m_ofs) && (m_sel == other.m_sel);
			
@@ -3811,7 +3810,7 @@ namespace basisu
 
				 	{
			
 
				 		std::size_t operator()(selector_bitsequence const& s) const noexcept
			
 
				 		{
			
 
				-			return static_cast<std::size_t>(hash_hsieh((uint8_t *)&s, sizeof(s)) ^ s.m_sel);
			
 
				+			return hash_hsieh((const uint8_t*)&s, sizeof(s));
			
 
				 		}
			
 
				 	};
			
 
				 
			
--- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h
@@ -1,5 +1,5 @@
 
				 // basisu_uastc_enc.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/cppspmd_flow.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_flow.h
@@ -1,7 +1,7 @@
 
				 // Do not include this header directly.
			
 
				 // Control flow functionality in common between all the headers.
			
 
				 //
			
 
				-// Copyright 2020-2021 Binomial LLC
			
 
				+// Copyright 2020-2024 Binomial LLC
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/cppspmd_math.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_math.h
@@ -1,6 +1,6 @@
 
				 // Do not include this header directly.
			
 
				 //
			
 
				-// Copyright 2020-2021 Binomial LLC
			
 
				+// Copyright 2020-2024 Binomial LLC
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -646,7 +646,7 @@ CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x)
 
				 {
			
 
				 	vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555);                    
			
 
				 	vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333);     
			
 
				-	return VUINT_SHIFT_RIGHT(((v1 + VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F) * 0x1010101), 24);
			
 
				+	return VUINT_SHIFT_RIGHT(((v1 + (VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F)) * 0x1010101), 24);
			
 
				 }
			
 
				 
			
 
				 CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) 
			
--- a/thirdparty/basis_universal/encoder/cppspmd_math_declares.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h
@@ -1,7 +1,7 @@
 
				 // Do not include this header directly.
			
 
				 // This header defines shared struct spmd_kernel helpers.
			
 
				 //
			
 
				-// Copyright 2020-2021 Binomial LLC
			
 
				+// Copyright 2020-2024 Binomial LLC
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/cppspmd_sse.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_sse.h
@@ -450,7 +450,7 @@ struct spmd_kernel
 
				 		CPPSPMD_FORCE_INLINE explicit operator vint() const;
			
 
				 								
			
 
				 	private:
			
 
				-		vbool& operator=(const vbool&);
			
 
				+		//vbool& operator=(const vbool&);
			
 
				 	};
			
 
				 
			
 
				 	friend vbool operator!(const vbool& v);
			
@@ -481,7 +481,7 @@ struct spmd_kernel
 
				 		CPPSPMD_FORCE_INLINE explicit vfloat(int value) : m_value(_mm_set1_ps((float)value)) { }
			
 
				 
			
 
				 	private:
			
 
				-		vfloat& operator=(const vfloat&);
			
 
				+		//vfloat& operator=(const vfloat&);
			
 
				 	};
			
 
				 
			
 
				 	CPPSPMD_FORCE_INLINE vfloat& store(vfloat& dst, const vfloat& src)
			
@@ -514,7 +514,7 @@ struct spmd_kernel
 
				 		float* m_pValue;
			
 
				 
			
 
				 	private:
			
 
				-		float_lref& operator=(const float_lref&);
			
 
				+		//float_lref& operator=(const float_lref&);
			
 
				 	};
			
 
				 
			
 
				 	CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref& dst, const vfloat& src)
			
@@ -561,7 +561,7 @@ struct spmd_kernel
 
				 		float* m_pValue;
			
 
				 		
			
 
				 	private:
			
 
				-		float_vref& operator=(const float_vref&);
			
 
				+		//float_vref& operator=(const float_vref&);
			
 
				 	};
			
 
				 
			
 
				 	// Varying ref to varying float
			
@@ -571,7 +571,7 @@ struct spmd_kernel
 
				 		vfloat* m_pValue;
			
 
				 		
			
 
				 	private:
			
 
				-		vfloat_vref& operator=(const vfloat_vref&);
			
 
				+		//vfloat_vref& operator=(const vfloat_vref&);
			
 
				 	};
			
 
				 
			
 
				 	// Varying ref to varying int
			
@@ -581,7 +581,7 @@ struct spmd_kernel
 
				 		vint* m_pValue;
			
 
				 		
			
 
				 	private:
			
 
				-		vint_vref& operator=(const vint_vref&);
			
 
				+		//vint_vref& operator=(const vint_vref&);
			
 
				 	};
			
 
				 
			
 
				 	CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src);
			
@@ -624,7 +624,7 @@ struct spmd_kernel
 
				 		int* m_pValue;
			
 
				 
			
 
				 	private:
			
 
				-		int_lref& operator=(const int_lref&);
			
 
				+		//int_lref& operator=(const int_lref&);
			
 
				 	};
			
 
				 		
			
 
				 	CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src)
			
@@ -663,7 +663,7 @@ struct spmd_kernel
 
				 		int16_t* m_pValue;
			
 
				 
			
 
				 	private:
			
 
				-		int16_lref& operator=(const int16_lref&);
			
 
				+		//int16_lref& operator=(const int16_lref&);
			
 
				 	};
			
 
				 
			
 
				 	CPPSPMD_FORCE_INLINE const int16_lref& store(const int16_lref& dst, const vint& src)
			
@@ -720,7 +720,7 @@ struct spmd_kernel
 
				 		const int* m_pValue;
			
 
				 
			
 
				 	private:
			
 
				-		cint_lref& operator=(const cint_lref&);
			
 
				+		//cint_lref& operator=(const cint_lref&);
			
 
				 	};
			
 
				 
			
 
				 	CPPSPMD_FORCE_INLINE vint load(const cint_lref& src)
			
@@ -742,7 +742,7 @@ struct spmd_kernel
 
				 		int* m_pValue;
			
 
				 
			
 
				 	private:
			
 
				-		int_vref& operator=(const int_vref&);
			
 
				+		//int_vref& operator=(const int_vref&);
			
 
				 	};
			
 
				 
			
 
				 	// Varying ref to constant ints
			
@@ -752,7 +752,7 @@ struct spmd_kernel
 
				 		const int* m_pValue;
			
 
				 
			
 
				 	private:
			
 
				-		cint_vref& operator=(const cint_vref&);
			
 
				+		//cint_vref& operator=(const cint_vref&);
			
 
				 	};
			
 
				 
			
 
				 	// Varying int
			
@@ -810,7 +810,7 @@ struct spmd_kernel
 
				 		}
			
 
				 
			
 
				 	private:
			
 
				-		vint& operator=(const vint&);
			
 
				+		//vint& operator=(const vint&);
			
 
				 	};
			
 
				 
			
 
				 	// Load/store linear int
			
@@ -1206,7 +1206,7 @@ struct spmd_kernel
 
				 	CPPSPMD_FORCE_INLINE vint load_all(const vint_vref& src)
			
 
				 	{
			
 
				 		// TODO: There's surely a better way
			
 
				-		__m128i k;
			
 
				+		__m128i k = _mm_setzero_si128();
			
 
				 
			
 
				 		k = insert_x(k, ((int*)(&src.m_pValue[extract_x(src.m_vindex)]))[0]);
			
 
				 		k = insert_y(k, ((int*)(&src.m_pValue[extract_y(src.m_vindex)]))[1]);
			
@@ -1261,7 +1261,7 @@ struct spmd_kernel
 
				 		}
			
 
				 
			
 
				 	private:
			
 
				-		lint& operator=(const lint&);
			
 
				+		//lint& operator=(const lint&);
			
 
				 	};
			
 
				 
			
 
				 	CPPSPMD_FORCE_INLINE lint& store_all(lint& dst, const lint& src)
			
--- a/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h
@@ -1,7 +1,7 @@
 
				 // cppspmd_type_aliases.h
			
 
				 // Do not include this file directly
			
 
				 //
			
 
				-// Copyright 2020-2021 Binomial LLC
			
 
				+// Copyright 2020-2024 Binomial LLC
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/encoder/pvpngreader.cpp
+++ b/thirdparty/basis_universal/encoder/pvpngreader.cpp
@@ -163,7 +163,7 @@ public:
 
				 		{
			
 
				 			if ((sizeof(size_t) == sizeof(uint32_t)) && (new_size > 0x7FFFFFFFUL))
			
 
				 				return 0;
			
 
				-			m_buf.resize(new_size);
			
 
				+			m_buf.resize((size_t)new_size);
			
 
				 		}
			
 
				 
			
 
				 		memcpy(&m_buf[(size_t)m_ofs], pBuf, len);
			
@@ -178,11 +178,11 @@ public:
 
				 			return 0;
			
 
				 
			
 
				 		uint64_t max_bytes = minimum<uint64_t>(len, m_buf.size() - m_ofs);
			
 
				-		memcpy(pBuf, &m_buf[(size_t)m_ofs], max_bytes);
			
 
				+		memcpy(pBuf, &m_buf[(size_t)m_ofs], (size_t)max_bytes);
			
 
				 
			
 
				 		m_ofs += max_bytes;
			
 
				 
			
 
				-		return max_bytes;
			
 
				+		return (size_t)max_bytes;
			
 
				 	}
			
 
				 };
			
 
				 
			
@@ -249,11 +249,11 @@ public:
 
				 			return 0;
			
 
				 
			
 
				 		uint64_t max_bytes = minimum<uint64_t>(len, m_buf_size - m_ofs);
			
 
				-		memcpy(pBuf, &m_pBuf[(size_t)m_ofs], max_bytes);
			
 
				+		memcpy(pBuf, &m_pBuf[(size_t)m_ofs], (size_t)max_bytes);
			
 
				 
			
 
				 		m_ofs += max_bytes;
			
 
				 
			
 
				-		return max_bytes;
			
 
				+		return (size_t)max_bytes;
			
 
				 	}
			
 
				 };
			
 
				 
			
@@ -1626,8 +1626,8 @@ int png_decoder::png_decode_start()
 
				 
			
 
				 	if (m_ihdr.m_ilace_type == 1)
			
 
				 	{
			
 
				-		int i;
			
 
				-		uint32_t total_lines, lines_processed;
			
 
				+		//int i;
			
 
				+		//uint32_t total_lines, lines_processed;
			
 
				 
			
 
				 		m_adam7_pass_size_x[0] = adam7_pass_size(m_ihdr.m_width, 0, 8);
			
 
				 		m_adam7_pass_size_x[1] = adam7_pass_size(m_ihdr.m_width, 4, 8);
			
@@ -1651,10 +1651,12 @@ int png_decoder::png_decode_start()
 
				 
			
 
				 		m_pass_y_left = 0;
			
 
				 
			
 
				+#if 0
			
 
				 		total_lines = lines_processed = 0;
			
 
				 
			
 
				 		for (i = 0; i < 7; i++)
			
 
				 			total_lines += m_adam7_pass_size_y[i];
			
 
				+#endif
			
 
				 
			
 
				 		for (; ; )
			
 
				 		{
			
@@ -1675,7 +1677,7 @@ int png_decoder::png_decode_start()
 
				 				}
			
 
				 			}
			
 
				 
			
 
				-			lines_processed++;
			
 
				+			//lines_processed++;
			
 
				 		}
			
 
				 
			
 
				 		m_adam7_decoded_flag = TRUE;
			
--- a/thirdparty/basis_universal/patches/external-jpgd.patch
+++ b/thirdparty/basis_universal/patches/external-jpgd.patch
--- a/thirdparty/basis_universal/patches/external-tinyexr.patch
+++ b/thirdparty/basis_universal/patches/external-tinyexr.patch
@@ -0,0 +1,23 @@
 
				+diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
			
 
				+index 6c0ac0ad370..2bf486a0287 100644
			
 
				+--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
			
 
				++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
			
 
				+@@ -27,7 +27,7 @@
			
 
				+ #ifndef TINYEXR_USE_ZFP
			
 
				+ #define TINYEXR_USE_ZFP (1)
			
 
				+ #endif
			
 
				+-#include "3rdparty/tinyexr.h"
			
 
				++#include <tinyexr.h>
			
 
				+ 
			
 
				+ #ifndef MINIZ_HEADER_FILE_ONLY
			
 
				+ #define MINIZ_HEADER_FILE_ONLY
			
 
				+@@ -3257,7 +3257,8 @@ namespace basisu
			
 
				+ 		float* out_rgba = nullptr;
			
 
				+ 		const char* err = nullptr;
			
 
				+ 		
			
 
				+-		int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans);
			
 
				++		int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);
			
 
				++		n_chans = 4;
			
 
				+ 		if (status != 0)
			
 
				+ 		{
			
 
				+ 			error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");
			
--- a/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch
+++ b/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch
@@ -0,0 +1,446 @@
 
				+diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
			
 
				+index 2bf486a0287..fff98e83014 100644
			
 
				+--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
			
 
				++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
			
 
				+@@ -37,9 +37,6 @@
			
 
				+ #endif
			
 
				+ #include "basisu_miniz.h"
			
 
				+ 
			
 
				+-#define QOI_IMPLEMENTATION
			
 
				+-#include "3rdparty/qoi.h"
			
 
				+-
			
 
				+ #if defined(_WIN32)
			
 
				+ // For QueryPerformanceCounter/QueryPerformanceFrequency
			
 
				+ #define WIN32_LEAN_AND_MEAN
			
 
				+@@ -408,16 +405,7 @@ namespace basisu
			
 
				+ 
			
 
				+ 	bool load_qoi(const char* pFilename, image& img)
			
 
				+ 	{
			
 
				+-		qoi_desc desc;
			
 
				+-		clear_obj(desc);
			
 
				+-
			
 
				+-		void* p = qoi_read(pFilename, &desc, 4);
			
 
				+-		if (!p)
			
 
				+-			return false;
			
 
				+-
			
 
				+-		img.grant_ownership(static_cast<color_rgba *>(p), desc.width, desc.height);
			
 
				+-
			
 
				+-		return true;
			
 
				++		return false;
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)
			
 
				+diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
			
 
				+index 000869a5337..342446b8fd4 100644
			
 
				+--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
			
 
				++++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
			
 
				+@@ -19,9 +19,6 @@
			
 
				+ #include "basisu_bc7enc.h"
			
 
				+ #include "../transcoder/basisu_astc_hdr_core.h"
			
 
				+ 
			
 
				+-#define TINYDDS_IMPLEMENTATION
			
 
				+-#include "3rdparty/tinydds.h"
			
 
				+-
			
 
				+ namespace basisu
			
 
				+ {
			
 
				+ 	//------------------------------------------------------------------------------------------------
			
 
				+@@ -1979,208 +1976,8 @@ namespace basisu
			
 
				+ 	// Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped
			
 
				+ 	// and cubemap, cubemap mipmapped, and cubemap array mipmapped.
			
 
				+ 	bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
			
 
				+-	{
			
 
				+-		if (!gpu_images.size())
			
 
				+-		{
			
 
				+-			assert(0);
			
 
				+-			return false;
			
 
				+-		}
			
 
				+-
			
 
				+-		// Sanity check the input
			
 
				+-		uint32_t slices = 1;
			
 
				+-		if (cubemap_flag)
			
 
				+-		{
			
 
				+-			if ((gpu_images.size() % 6) != 0)
			
 
				+-			{
			
 
				+-				assert(0);
			
 
				+-				return false;
			
 
				+-			}
			
 
				+-			slices = gpu_images.size() / 6;
			
 
				+-		}
			
 
				+-		else
			
 
				+-		{
			
 
				+-			slices = gpu_images.size();
			
 
				+-		}
			
 
				+-
			
 
				+-		uint32_t width = 0, height = 0, total_levels = 0;
			
 
				+-		basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
			
 
				+-
			
 
				+-		// Sanity check the input for consistent # of dimensions and mip levels
			
 
				+-		for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
			
 
				+-		{
			
 
				+-			const gpu_image_vec& levels = gpu_images[array_index];
			
 
				+-
			
 
				+-			if (!levels.size())
			
 
				+-			{
			
 
				+-				// Empty mip chain
			
 
				+-				assert(0);
			
 
				+-				return false;
			
 
				+-			}
			
 
				+-
			
 
				+-			if (!array_index)
			
 
				+-			{
			
 
				+-				width = levels[0].get_pixel_width();
			
 
				+-				height = levels[0].get_pixel_height();
			
 
				+-				total_levels = (uint32_t)levels.size();
			
 
				+-				fmt = levels[0].get_format();
			
 
				+-			}
			
 
				+-			else
			
 
				+-			{
			
 
				+-				if ((width != levels[0].get_pixel_width()) ||
			
 
				+-					(height != levels[0].get_pixel_height()) ||
			
 
				+-					(total_levels != levels.size()))
			
 
				+-				{
			
 
				+-					// All cubemap/texture array faces must be the same dimension
			
 
				+-					assert(0);
			
 
				+-					return false;
			
 
				+-				}
			
 
				+-			}
			
 
				+-
			
 
				+-			for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
			
 
				+-			{
			
 
				+-				if (level_index)
			
 
				+-				{
			
 
				+-					if ((levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
			
 
				+-						(levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)))
			
 
				+-					{
			
 
				+-						// Malformed mipmap chain
			
 
				+-						assert(0);
			
 
				+-						return false;
			
 
				+-					}
			
 
				+-				}
			
 
				+-
			
 
				+-				if (fmt != levels[level_index].get_format())
			
 
				+-				{
			
 
				+-					// All input textures must use the same GPU format
			
 
				+-					assert(0);
			
 
				+-					return false;
			
 
				+-				}
			
 
				+-			}
			
 
				+-		}
			
 
				+-
			
 
				+-		// No mipmap levels
			
 
				+-		if (!total_levels)
			
 
				+-		{
			
 
				+-			assert(0);
			
 
				+-			return false;
			
 
				+-		}
			
 
				+-
			
 
				+-		// Create the DDS mipmap level data
			
 
				+-		uint8_vec mipmaps[32];
			
 
				+-
			
 
				+-		// See https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dds-file-layout-for-cubic-environment-maps
			
 
				+-		// DDS cubemap organization is cubemap face 0 followed by all mips, then cubemap face 1 followed by all mips, etc.
			
 
				+-		// Unfortunately tinydds.h's writer doesn't handle this case correctly, so we work around it here.
			
 
				+-		// This also applies with 2D texture arrays, too. RenderDoc and ddsview (DirectXTex) views each type (cubemap array and 2D texture array) correctly.
			
 
				+-		// Also see "Using Texture Arrays in Direct3D 10/11":
			
 
				+-		// https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide
			
 
				+-		for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
			
 
				+-		{
			
 
				+-			const gpu_image_vec& levels = gpu_images[array_index];
			
 
				+-
			
 
				+-			for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
			
 
				+-			{
			
 
				+-				append_vector(mipmaps[0], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes());
			
 
				+-
			
 
				+-			} // level_index
			
 
				+-		} // array_index
			
 
				+-
			
 
				+-#if 0
			
 
				+-		// This organization, required by tinydds.h's API, is wrong.
			
 
				+-		{
			
 
				+-			for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
			
 
				+-			{
			
 
				+-				const gpu_image_vec& levels = gpu_images[array_index];
			
 
				+-
			
 
				+-				for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
			
 
				+-				{
			
 
				+-					append_vector(mipmaps[level_index], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes());
			
 
				+-
			
 
				+-				} // level_index
			
 
				+-			} // array_index
			
 
				+-		}
			
 
				+-#endif
			
 
				+-		
			
 
				+-		// Write DDS file using tinydds
			
 
				+-		TinyDDS_WriteCallbacks cbs;
			
 
				+-		cbs.error = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user);  fprintf(stderr, "tinydds: %s\n", msg); };
			
 
				+-		cbs.alloc = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); };
			
 
				+-		cbs.free = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); };
			
 
				+-		cbs.write = [](void* user, void const* buffer, size_t byteCount) { BASISU_NOTE_UNUSED(user); uint8_vec* pVec = (uint8_vec*)user; append_vector(*pVec, (const uint8_t*)buffer, byteCount); };
			
 
				+-
			
 
				+-		uint32_t mipmap_sizes[32];
			
 
				+-		const void* mipmap_ptrs[32];
			
 
				+-		
			
 
				+-		clear_obj(mipmap_sizes);
			
 
				+-		clear_obj(mipmap_ptrs);
			
 
				+-
			
 
				+-		assert(total_levels < 32);
			
 
				+-		for (uint32_t i = 0; i < total_levels; i++)
			
 
				+-		{
			
 
				+-			mipmap_sizes[i] = mipmaps[i].size_in_bytes();
			
 
				+-			mipmap_ptrs[i] = mipmaps[i].get_ptr();
			
 
				+-		}
			
 
				+-
			
 
				+-		// Select tinydds texture format
			
 
				+-		uint32_t tinydds_fmt = 0;
			
 
				+-
			
 
				+-		switch (fmt)
			
 
				+-		{
			
 
				+-			case texture_format::cBC1_NV:
			
 
				+-			case texture_format::cBC1_AMD:
			
 
				+-			case texture_format::cBC1: 
			
 
				+-				tinydds_fmt = use_srgb_format ? TDDS_BC1_RGBA_SRGB_BLOCK : TDDS_BC1_RGBA_UNORM_BLOCK;
			
 
				+-				break;
			
 
				+-			case texture_format::cBC3:
			
 
				+-				tinydds_fmt = use_srgb_format ? TDDS_BC3_SRGB_BLOCK : TDDS_BC3_UNORM_BLOCK;
			
 
				+-				break;
			
 
				+-			case texture_format::cBC4:
			
 
				+-				tinydds_fmt = TDDS_BC4_UNORM_BLOCK;
			
 
				+-				break;
			
 
				+-			case texture_format::cBC5:
			
 
				+-				tinydds_fmt = TDDS_BC5_UNORM_BLOCK;
			
 
				+-				break;
			
 
				+-			case texture_format::cBC6HSigned:
			
 
				+-				tinydds_fmt = TDDS_BC6H_SFLOAT_BLOCK;
			
 
				+-				break;
			
 
				+-			case texture_format::cBC6HUnsigned:
			
 
				+-				tinydds_fmt = TDDS_BC6H_UFLOAT_BLOCK;
			
 
				+-				break;
			
 
				+-			case texture_format::cBC7:
			
 
				+-				tinydds_fmt = use_srgb_format ? TDDS_BC7_SRGB_BLOCK : TDDS_BC7_UNORM_BLOCK;
			
 
				+-				break;
			
 
				+-			default:
			
 
				+-			{
			
 
				+-				fprintf(stderr, "Warning: Unsupported format in write_dds_file().\n");
			
 
				+-				return false;
			
 
				+-			}
			
 
				+-		}
			
 
				+-
			
 
				+-		// DirectXTex's DDSView doesn't handle odd sizes textures correctly. RenderDoc loads them fine, however.
			
 
				+-		// Trying to work around this here results in invalid mipmaps. 
			
 
				+-		//width = (width + 3) & ~3;
			
 
				+-		//height = (height + 3) & ~3;
			
 
				+-
			
 
				+-		bool status = TinyDDS_WriteImage(&cbs,
			
 
				+-			&dds_data,
			
 
				+-			width,
			
 
				+-			height,
			
 
				+-			1,
			
 
				+-			slices,
			
 
				+-			total_levels,
			
 
				+-			(TinyDDS_Format)tinydds_fmt,
			
 
				+-			cubemap_flag,
			
 
				+-			true,
			
 
				+-			mipmap_sizes,
			
 
				+-			mipmap_ptrs);
			
 
				+-
			
 
				+-		if (!status)
			
 
				+-		{
			
 
				+-			fprintf(stderr, "write_dds_file: Failed creating DDS file\n");
			
 
				+-			return false;
			
 
				+-		}
			
 
				+-								
			
 
				+-		return true;
			
 
				++	{		
			
 
				++		return false;
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
			
 
				+@@ -2201,188 +1998,6 @@ namespace basisu
			
 
				+ 		
			
 
				+ 	bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips,	basisu::vector<imagef>& hdr_mips)
			
 
				+ 	{
			
 
				+-		const uint32_t MAX_IMAGE_DIM = 16384;
			
 
				+-
			
 
				+-		TinyDDS_Callbacks cbs;
			
 
				+-
			
 
				+-		cbs.errorFn = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); };
			
 
				+-		cbs.allocFn = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); };
			
 
				+-		cbs.freeFn = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); };
			
 
				+-		cbs.readFn = [](void* user, void* buffer, size_t byteCount) -> size_t { return (size_t)fread(buffer, 1, byteCount, (FILE*)user); };
			
 
				+-		
			
 
				+-#ifdef _MSC_VER
			
 
				+-		cbs.seekFn = [](void* user, int64_t ofs) -> bool { return _fseeki64((FILE*)user, ofs, SEEK_SET) == 0; };
			
 
				+-		cbs.tellFn = [](void* user) -> int64_t { return _ftelli64((FILE*)user); };
			
 
				+-#else
			
 
				+-		cbs.seekFn = [](void* user, int64_t ofs) -> bool { return fseek((FILE*)user, (long)ofs, SEEK_SET) == 0; };
			
 
				+-		cbs.tellFn = [](void* user) -> int64_t { return (int64_t)ftell((FILE*)user); };
			
 
				+-#endif
			
 
				+-
			
 
				+-		FILE* pFile = fopen_safe(pFilename, "rb");
			
 
				+-		if (!pFile)
			
 
				+-		{
			
 
				+-			error_printf("Can't open .DDS file \"%s\"\n", pFilename);
			
 
				+-			return false;
			
 
				+-		}
			
 
				+-
			
 
				+-		// These are the formats AMD Compressonator supports in its UI.
			
 
				+-		enum dds_fmt
			
 
				+-		{
			
 
				+-			cRGBA32,
			
 
				+-			cRGBA_HALF,
			
 
				+-			cRGBA_FLOAT
			
 
				+-		};
			
 
				+-
			
 
				+-		bool status = false;
			
 
				+-		dds_fmt fmt = cRGBA32;
			
 
				+-		uint32_t width = 0, height = 0;
			
 
				+-		bool hdr_flag = false;
			
 
				+-		TinyDDS_Format tfmt = TDDS_UNDEFINED;
			
 
				+-
			
 
				+-		TinyDDS_ContextHandle ctx = TinyDDS_CreateContext(&cbs, pFile);
			
 
				+-		if (!ctx)
			
 
				+-			goto failure;
			
 
				+-
			
 
				+-		status = TinyDDS_ReadHeader(ctx);
			
 
				+-		if (!status)
			
 
				+-		{
			
 
				+-			error_printf("Failed parsing DDS header in file \"%s\"\n", pFilename);
			
 
				+-			goto failure;
			
 
				+-		}
			
 
				+-				
			
 
				+-		if ((!TinyDDS_Is2D(ctx)) || (TinyDDS_ArraySlices(ctx) > 1) || (TinyDDS_IsCubemap(ctx)))
			
 
				+-		{
			
 
				+-			error_printf("Unsupported DDS texture type in file \"%s\"\n", pFilename);
			
 
				+-			goto failure;
			
 
				+-		}
			
 
				+-
			
 
				+-		width = TinyDDS_Width(ctx);
			
 
				+-		height = TinyDDS_Height(ctx);
			
 
				+-						
			
 
				+-		if (!width || !height)
			
 
				+-		{
			
 
				+-			error_printf("DDS texture dimensions invalid in file \"%s\"\n", pFilename);
			
 
				+-			goto failure;
			
 
				+-		}
			
 
				+-
			
 
				+-		if ((width > MAX_IMAGE_DIM) || (height > MAX_IMAGE_DIM))
			
 
				+-		{
			
 
				+-			error_printf("DDS texture dimensions too large in file \"%s\"\n", pFilename);
			
 
				+-			goto failure;
			
 
				+-		}
			
 
				+-		
			
 
				+-		tfmt = TinyDDS_GetFormat(ctx);
			
 
				+-		switch (tfmt)
			
 
				+-		{
			
 
				+-		case TDDS_R8G8B8A8_SRGB:
			
 
				+-		case TDDS_R8G8B8A8_UNORM:
			
 
				+-		case TDDS_B8G8R8A8_SRGB:
			
 
				+-		case TDDS_B8G8R8A8_UNORM:
			
 
				+-			fmt = cRGBA32;
			
 
				+-			break;
			
 
				+-		case TDDS_R16G16B16A16_SFLOAT:
			
 
				+-			fmt = cRGBA_HALF;
			
 
				+-			hdr_flag = true;
			
 
				+-			break;
			
 
				+-		case TDDS_R32G32B32A32_SFLOAT:
			
 
				+-			fmt = cRGBA_FLOAT;
			
 
				+-			hdr_flag = true;
			
 
				+-			break;
			
 
				+-		default:
			
 
				+-			error_printf("File \"%s\" has an unsupported DDS texture format (only supports RGBA/BGRA 32bpp, RGBA HALF float, or RGBA FLOAT)\n", pFilename);
			
 
				+-			goto failure;
			
 
				+-		}
			
 
				+-
			
 
				+-		if (hdr_flag)
			
 
				+-			hdr_mips.resize(TinyDDS_NumberOfMipmaps(ctx));
			
 
				+-		else
			
 
				+-			ldr_mips.resize(TinyDDS_NumberOfMipmaps(ctx));
			
 
				+-
			
 
				+-		for (uint32_t level = 0; level < TinyDDS_NumberOfMipmaps(ctx); level++)
			
 
				+-		{
			
 
				+-			const uint32_t level_width = TinyDDS_MipMapReduce(width, level);
			
 
				+-			const uint32_t level_height = TinyDDS_MipMapReduce(height, level);
			
 
				+-			const uint32_t total_level_texels = level_width * level_height;
			
 
				+-
			
 
				+-			const void* pImage = TinyDDS_ImageRawData(ctx, level);
			
 
				+-			const uint32_t image_size = TinyDDS_ImageSize(ctx, level);
			
 
				+-
			
 
				+-			if (fmt == cRGBA32)
			
 
				+-			{
			
 
				+-				ldr_mips[level].resize(level_width, level_height);
			
 
				+-
			
 
				+-				if ((ldr_mips[level].get_total_pixels() * sizeof(uint32_t) != image_size))
			
 
				+-				{
			
 
				+-					assert(0);
			
 
				+-					goto failure;
			
 
				+-				}
			
 
				+-
			
 
				+-				memcpy(ldr_mips[level].get_ptr(), pImage, image_size);
			
 
				+-								
			
 
				+-				if ((tfmt == TDDS_B8G8R8A8_SRGB) || (tfmt == TDDS_B8G8R8A8_UNORM))
			
 
				+-				{
			
 
				+-					// Swap R and B components.
			
 
				+-					uint32_t *pTexels = (uint32_t *)ldr_mips[level].get_ptr();
			
 
				+-					for (uint32_t i = 0; i < total_level_texels; i++)
			
 
				+-					{
			
 
				+-						const uint32_t v = pTexels[i];
			
 
				+-						const uint32_t r = (v >> 16) & 0xFF;
			
 
				+-						const uint32_t b = v & 0xFF;
			
 
				+-						pTexels[i] = r | (b << 16) | (v & 0xFF00FF00);
			
 
				+-					}
			
 
				+-				}
			
 
				+-			}
			
 
				+-			else if (fmt == cRGBA_FLOAT)
			
 
				+-			{
			
 
				+-				hdr_mips[level].resize(level_width, level_height);
			
 
				+-
			
 
				+-				if ((hdr_mips[level].get_total_pixels() * sizeof(float) * 4 != image_size))
			
 
				+-				{
			
 
				+-					assert(0);
			
 
				+-					goto failure;
			
 
				+-				}
			
 
				+-
			
 
				+-				memcpy(hdr_mips[level].get_ptr(), pImage, image_size);
			
 
				+-			}
			
 
				+-			else if (fmt == cRGBA_HALF)
			
 
				+-			{
			
 
				+-				hdr_mips[level].resize(level_width, level_height);
			
 
				+-				
			
 
				+-				if ((hdr_mips[level].get_total_pixels() * sizeof(basist::half_float) * 4 != image_size))
			
 
				+-				{
			
 
				+-					assert(0);
			
 
				+-					goto failure;
			
 
				+-				}
			
 
				+-
			
 
				+-				// Unpack half to float.
			
 
				+-				const basist::half_float* pSrc_comps = static_cast<const basist::half_float*>(pImage);
			
 
				+-				vec4F* pDst_texels = hdr_mips[level].get_ptr();
			
 
				+-				
			
 
				+-				for (uint32_t i = 0; i < total_level_texels; i++)
			
 
				+-				{
			
 
				+-					(*pDst_texels)[0] = basist::half_to_float(pSrc_comps[0]);
			
 
				+-					(*pDst_texels)[1] = basist::half_to_float(pSrc_comps[1]);
			
 
				+-					(*pDst_texels)[2] = basist::half_to_float(pSrc_comps[2]);
			
 
				+-					(*pDst_texels)[3] = basist::half_to_float(pSrc_comps[3]);
			
 
				+-
			
 
				+-					pSrc_comps += 4;
			
 
				+-					pDst_texels++;
			
 
				+-				} // y
			
 
				+-			}
			
 
				+-		} // level
			
 
				+-
			
 
				+-		TinyDDS_DestroyContext(ctx);
			
 
				+-		fclose(pFile);
			
 
				+-
			
 
				+-		return true;
			
 
				+-
			
 
				+-	failure:
			
 
				+-		if (ctx)
			
 
				+-			TinyDDS_DestroyContext(ctx);
			
 
				+-
			
 
				+-		if (pFile)
			
 
				+-			fclose(pFile);
			
 
				+-
			
 
				+ 		return false;
			
 
				+ 	}
			
 
				+ 
			
--- a/thirdparty/basis_universal/transcoder/basisu.h
+++ b/thirdparty/basis_universal/transcoder/basisu.h
@@ -1,5 +1,5 @@
 
				 // basisu.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -117,13 +117,26 @@ namespace basisu
 
				 	typedef basisu::vector<uint64_t> uint64_vec;
			
 
				 	typedef basisu::vector<int> int_vec;
			
 
				 	typedef basisu::vector<bool> bool_vec;
			
 
				+	typedef basisu::vector<float> float_vec;
			
 
				 
			
 
				 	void enable_debug_printf(bool enabled);
			
 
				 	void debug_printf(const char *pFmt, ...);
			
 
				-		
			
 
				 
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
			
 
				+#endif                  
			
 
				+#endif
			
 
				+		
			
 
				 	template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); }
			
 
				 
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif                            
			
 
				+#endif
			
 
				+
			
 
				 	template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
			
 
				 
			
 
				 	template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
			
@@ -162,10 +175,45 @@ namespace basisu
 
				 	template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; }
			
 
				 	template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; }
			
 
				 
			
 
				+	// Open interval
			
 
				+	inline bool in_bounds(int v, int l, int h)
			
 
				+	{
			
 
				+		return (v >= l) && (v < h);
			
 
				+	}
			
 
				+
			
 
				+	// Closed interval
			
 
				+	inline bool in_range(int v, int l, int h)
			
 
				+	{
			
 
				+		return (v >= l) && (v <= h);
			
 
				+	}
			
 
				+
			
 
				 	inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; }
			
 
				 
			
 
				 	template<typename T> inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); }
			
 
				 
			
 
				+	inline uint32_t get_bit(uint32_t src, int ndx)
			
 
				+	{
			
 
				+		assert(in_bounds(ndx, 0, 32));
			
 
				+		return (src >> ndx) & 1;
			
 
				+	}
			
 
				+
			
 
				+	inline bool is_bit_set(uint32_t src, int ndx)
			
 
				+	{
			
 
				+		return get_bit(src, ndx) != 0;
			
 
				+	}
			
 
				+
			
 
				+	inline uint32_t get_bits(uint32_t val, int low, int high)
			
 
				+	{
			
 
				+		const int num_bits = (high - low) + 1;
			
 
				+		assert(in_range(num_bits, 1, 32));
			
 
				+
			
 
				+		val >>= low;
			
 
				+		if (num_bits != 32)
			
 
				+			val &= ((1u << num_bits) - 1);
			
 
				+
			
 
				+		return val;
			
 
				+	}
			
 
				+
			
 
				 	template<typename T, typename R> inline void append_vector(T &vec, const R *pObjs, size_t n) 
			
 
				 	{ 
			
 
				 		if (n)
			
@@ -267,6 +315,11 @@ namespace basisu
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				+	static inline uint32_t read_le_word(const uint8_t* pBytes)
			
 
				+	{
			
 
				+		return (pBytes[1] << 8U) | (pBytes[0]);
			
 
				+	}
			
 
				+
			
 
				 	static inline uint32_t read_le_dword(const uint8_t *pBytes)
			
 
				 	{
			
 
				 		return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]);
			
@@ -303,6 +356,10 @@ namespace basisu
 
				 			return *this;
			
 
				 		}
			
 
				 
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Warray-bounds"            
			
 
				+#endif  
			
 
				 		inline operator uint32_t() const
			
 
				 		{
			
 
				 			switch (NumBytes)
			
@@ -354,6 +411,9 @@ namespace basisu
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif
			
 
				 	};
			
 
				 
			
 
				 	enum eZero { cZero };
			
@@ -402,8 +462,11 @@ namespace basisu
 
				 		cBC3,				// DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block)
			
 
				 		cBC4,				// DXT5A
			
 
				 		cBC5,				// 3DC/DXN (two BC4/DXT5A blocks)
			
 
				+		cBC6HSigned,		// HDR
			
 
				+		cBC6HUnsigned,		// HDR
			
 
				 		cBC7,
			
 
				-		cASTC4x4,		// LDR only
			
 
				+		cASTC_LDR_4x4,		// ASTC 4x4 LDR only
			
 
				+		cASTC_HDR_4x4,		// ASTC 4x4 HDR only (but may use LDR ASTC blocks internally)
			
 
				 		cPVRTC1_4_RGB,
			
 
				 		cPVRTC1_4_RGBA,
			
 
				 		cATC_RGB,
			
@@ -413,17 +476,22 @@ namespace basisu
 
				 		cETC2_R11_EAC,
			
 
				 		cETC2_RG11_EAC,
			
 
				 		cUASTC4x4,		
			
 
				+		cUASTC_HDR_4x4,
			
 
				 		cBC1_NV,
			
 
				 		cBC1_AMD,
			
 
				-		
			
 
				+				
			
 
				 		// Uncompressed/raw pixels
			
 
				 		cRGBA32,
			
 
				 		cRGB565,
			
 
				 		cBGR565,
			
 
				 		cRGBA4444,
			
 
				-		cABGR4444
			
 
				+		cABGR4444,
			
 
				+		cRGBA_HALF,
			
 
				+		cRGB_HALF,
			
 
				+		cRGB_9E5
			
 
				 	};
			
 
				 
			
 
				+	// This is bytes per block for GPU formats, or bytes per texel for uncompressed formats.
			
 
				 	inline uint32_t get_bytes_per_block(texture_format fmt)
			
 
				 	{
			
 
				 		switch (fmt)
			
@@ -443,13 +511,27 @@ namespace basisu
 
				 		case texture_format::cETC2_R11_EAC:
			
 
				 			return 8;
			
 
				 		case texture_format::cRGBA32:
			
 
				-			return sizeof(uint32_t) * 16;
			
 
				+		case texture_format::cRGB_9E5:
			
 
				+			return sizeof(uint32_t);
			
 
				+		case texture_format::cRGB_HALF:
			
 
				+			return sizeof(uint16_t) * 3;
			
 
				+		case texture_format::cRGBA_HALF:
			
 
				+			return sizeof(uint16_t) * 4;
			
 
				+		case texture_format::cRGB565:
			
 
				+		case texture_format::cBGR565:
			
 
				+		case texture_format::cRGBA4444:
			
 
				+		case texture_format::cABGR4444:
			
 
				+			return sizeof(uint16_t);
			
 
				+
			
 
				 		default:
			
 
				 			break;
			
 
				 		}
			
 
				+		
			
 
				+		// Everything else is 16 bytes/block.
			
 
				 		return 16;
			
 
				 	}
			
 
				 
			
 
				+	// This is qwords per block for GPU formats, or not valid for uncompressed formats.
			
 
				 	inline uint32_t get_qwords_per_block(texture_format fmt)
			
 
				 	{
			
 
				 		return get_bytes_per_block(fmt) >> 3;
			
@@ -473,6 +555,17 @@ namespace basisu
 
				 		BASISU_NOTE_UNUSED(fmt);
			
 
				 		return 4;
			
 
				 	}
			
 
				+
			
 
				+	inline bool is_hdr_texture_format(texture_format fmt)
			
 
				+	{
			
 
				+		if (fmt == texture_format::cASTC_HDR_4x4)
			
 
				+			return true;
			
 
				+		if (fmt == texture_format::cUASTC_HDR_4x4)
			
 
				+			return true;
			
 
				+		if ((fmt == texture_format::cBC6HSigned) || (fmt == texture_format::cBC6HUnsigned))
			
 
				+			return true;
			
 
				+		return false;
			
 
				+	}
			
 
				 							
			
 
				 } // namespace basisu
			
 
				 
			
--- a/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
@@ -0,0 +1,102 @@
 
				+// File: basisu_astc_hdr_core.h
			
 
				+#pragma once
			
 
				+#include "basisu_astc_helpers.h"
			
 
				+
			
 
				+namespace basist
			
 
				+{
			
 
				+	struct astc_blk
			
 
				+	{
			
 
				+		uint8_t m_vals[16];
			
 
				+	};
			
 
				+
			
 
				+	// ASTC_HDR_MAX_VAL is the maximum color component value that can be encoded.
			
 
				+	// If the input has values higher than this, they need to be linearly scaled so all values are between [0,ASTC_HDR_MAX_VAL], and the linear scaling inverted in the shader.
			
 
				+	const float ASTC_HDR_MAX_VAL = 65216.0f; // actually MAX_QLOG12_VAL
			
 
				+
			
 
				+	// Maximum usable QLOG encodings, and their floating point equivalent values, that don't result in NaN/Inf's.
			
 
				+	const uint32_t MAX_QLOG7 = 123;
			
 
				+	//const float MAX_QLOG7_VAL = 55296.0f;
			
 
				+
			
 
				+	const uint32_t MAX_QLOG8 = 247;
			
 
				+	//const float MAX_QLOG8_VAL = 60416.0f;
			
 
				+
			
 
				+	const uint32_t MAX_QLOG9 = 495;
			
 
				+	//const float MAX_QLOG9_VAL = 62976.0f;
			
 
				+
			
 
				+	const uint32_t MAX_QLOG10 = 991;
			
 
				+	//const float MAX_QLOG10_VAL = 64256.0f;
			
 
				+
			
 
				+	const uint32_t MAX_QLOG11 = 1983;
			
 
				+	//const float MAX_QLOG11_VAL = 64896.0f;
			
 
				+
			
 
				+	const uint32_t MAX_QLOG12 = 3967;
			
 
				+	//const float MAX_QLOG12_VAL = 65216.0f;
			
 
				+
			
 
				+	const uint32_t MAX_QLOG16 = 63487;
			
 
				+	const float MAX_QLOG16_VAL = 65504.0f;
			
 
				+
			
 
				+	const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;
			
 
				+
			
 
				+	// Notes:
			
 
				+	// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
			
 
				+	// However, this is not lossless in the general sense.
			
 
				+	inline half_float qlog16_to_half_slow(uint32_t qlog16)
			
 
				+	{
			
 
				+		assert(qlog16 <= 0xFFFF);
			
 
				+
			
 
				+		int C = qlog16;
			
 
				+
			
 
				+		int E = (C & 0xF800) >> 11;
			
 
				+		int M = C & 0x7FF;
			
 
				+
			
 
				+		int Mt;
			
 
				+		if (M < 512)
			
 
				+			Mt = 3 * M;
			
 
				+		else if (M >= 1536)
			
 
				+			Mt = 5 * M - 2048;
			
 
				+		else
			
 
				+			Mt = 4 * M - 512;
			
 
				+
			
 
				+		int Cf = (E << 10) + (Mt >> 3);
			
 
				+		return (half_float)Cf;
			
 
				+	}
			
 
				+
			
 
				+	// This is not lossless
			
 
				+	inline half_float qlog_to_half_slow(uint32_t qlog, uint32_t bits)
			
 
				+	{
			
 
				+		assert((bits >= 7U) && (bits <= 16U));
			
 
				+		assert(qlog < (1U << bits));
			
 
				+
			
 
				+		int C = qlog << (16 - bits);
			
 
				+		return qlog16_to_half_slow(C);
			
 
				+	}
			
 
				+
			
 
				+	void astc_hdr_core_init();
			
 
				+
			
 
				+	void decode_mode7_to_qlog12_ise20(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3],
			
 
				+		int* pScale);
			
 
				+
			
 
				+	bool decode_mode7_to_qlog12(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3],
			
 
				+		int* pScale,
			
 
				+		uint32_t ise_endpoint_range);
			
 
				+
			
 
				+	void decode_mode11_to_qlog12_ise20(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3]);
			
 
				+
			
 
				+	bool decode_mode11_to_qlog12(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3],
			
 
				+		uint32_t ise_endpoint_range);
			
 
				+
			
 
				+	bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk);
			
 
				+	bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk);
			
 
				+
			
 
				+	bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk);
			
 
				+	bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk);
			
 
				+
			
 
				+} // namespace basist
			
--- a/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
@@ -0,0 +1,3587 @@
 
				+// basisu_astc_helpers.h
			
 
				+// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
			
 
				+#pragma once
			
 
				+#ifndef BASISU_ASTC_HELPERS_HEADER
			
 
				+#define BASISU_ASTC_HELPERS_HEADER
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <stdint.h>
			
 
				+#include <math.h>
			
 
				+#include <fenv.h>
			
 
				+
			
 
				+namespace astc_helpers
			
 
				+{
			
 
				+	const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]
			
 
				+	const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
			
 
				+	const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
			
 
				+	const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
			
 
				+
			
 
				+	static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
			
 
				+	extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
			
 
				+
			
 
				+	// The Color Endpoint Modes (CEM's)
			
 
				+	enum cems
			
 
				+	{
			
 
				+		CEM_LDR_LUM_DIRECT = 0,
			
 
				+		CEM_LDR_LUM_BASE_PLUS_OFS = 1,
			
 
				+		CEM_HDR_LUM_LARGE_RANGE = 2,
			
 
				+		CEM_HDR_LUM_SMALL_RANGE = 3,
			
 
				+		CEM_LDR_LUM_ALPHA_DIRECT = 4,
			
 
				+		CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
			
 
				+		CEM_LDR_RGB_BASE_SCALE = 6,
			
 
				+		CEM_HDR_RGB_BASE_SCALE = 7,
			
 
				+		CEM_LDR_RGB_DIRECT = 8,
			
 
				+		CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
			
 
				+		CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
			
 
				+		CEM_HDR_RGB = 11,
			
 
				+		CEM_LDR_RGBA_DIRECT = 12,
			
 
				+		CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
			
 
				+		CEM_HDR_RGB_LDR_ALPHA = 14,
			
 
				+		CEM_HDR_RGB_HDR_ALPHA = 15
			
 
				+	};
			
 
				+
			
 
				+	// All Bounded Integer Sequence Coding (BISE or ISE) ranges.
			
 
				+	// Weights: Ranges [0,11] are valid.
			
 
				+	// Endpoints: Ranges [4,20] are valid.
			
 
				+	enum bise_levels
			
 
				+	{
			
 
				+		BISE_2_LEVELS = 0,
			
 
				+		BISE_3_LEVELS = 1,
			
 
				+		BISE_4_LEVELS = 2,
			
 
				+		BISE_5_LEVELS = 3,
			
 
				+		BISE_6_LEVELS = 4,
			
 
				+		BISE_8_LEVELS = 5,
			
 
				+		BISE_10_LEVELS = 6,
			
 
				+		BISE_12_LEVELS = 7,
			
 
				+		BISE_16_LEVELS = 8,
			
 
				+		BISE_20_LEVELS = 9,
			
 
				+		BISE_24_LEVELS = 10,
			
 
				+		BISE_32_LEVELS = 11,
			
 
				+		BISE_40_LEVELS = 12,
			
 
				+		BISE_48_LEVELS = 13,
			
 
				+		BISE_64_LEVELS = 14,
			
 
				+		BISE_80_LEVELS = 15,
			
 
				+		BISE_96_LEVELS = 16,
			
 
				+		BISE_128_LEVELS = 17,
			
 
				+		BISE_160_LEVELS = 18,
			
 
				+		BISE_192_LEVELS = 19,
			
 
				+		BISE_256_LEVELS = 20
			
 
				+	};
			
 
				+
			
 
				+	const uint32_t TOTAL_ISE_RANGES = 21;
			
 
				+
			
 
				+	// Valid endpoint ISE ranges
			
 
				+	const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
			
 
				+	const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
			
 
				+	const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
			
 
				+
			
 
				+	// Valid weight ISE ranges
			
 
				+	const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
			
 
				+	const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
			
 
				+	const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
			
 
				+
			
 
				+	// The ISE range table.
			
 
				+	extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
			
 
				+
			
 
				+	// Possible Color Component Select values, used in dual plane mode. 
			
 
				+	// The CCS component will be interpolated using the 2nd weight plane.
			
 
				+	enum ccs
			
 
				+	{
			
 
				+		CCS_GBA_R = 0,
			
 
				+		CCS_RBA_G = 1,
			
 
				+		CCS_RGA_B = 2,
			
 
				+		CCS_RGB_A = 3
			
 
				+	};
			
 
				+		
			
 
				+	struct astc_block
			
 
				+	{
			
 
				+		uint32_t m_vals[4];
			
 
				+	};
			
 
				+
			
 
				+	const uint32_t MAX_PARTITIONS = 4;				// Max # of partitions or subsets for single plane mode
			
 
				+	const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3;	// Max # of partitions or subsets for dual plane mode
			
 
				+	const uint32_t NUM_PARTITION_PATTERNS = 1024;	// Total # of partition pattern seeds (10-bits)
			
 
				+	const uint32_t MAX_ENDPOINTS = 18;				// Maximum # of endpoint values in a block
			
 
				+
			
 
				+	struct log_astc_block
			
 
				+	{
			
 
				+		bool m_error_flag;
			
 
				+		
			
 
				+		bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
			
 
				+		uint16_t m_solid_color[4];
			
 
				+
			
 
				+		// Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
			
 
				+		uint32_t m_grid_width, m_grid_height;	// weight grid dimensions, not the dimension of the block
			
 
				+		
			
 
				+		bool m_dual_plane;
			
 
				+
			
 
				+		uint32_t m_weight_ise_range;			// 0-11
			
 
				+		uint32_t m_endpoint_ise_range;			// 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
			
 
				+
			
 
				+		uint32_t m_color_component_selector;	// 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode
			
 
				+
			
 
				+		uint32_t m_num_partitions;				// or the # of subsets, 1-4 (1-3 if dual plane mode)
			
 
				+		uint32_t m_partition_id;				// 10-bits, must be 0 if m_num_partitions==1
			
 
				+		
			
 
				+		uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
			
 
				+		
			
 
				+		// ISE weight grid values. In dual plane mode, the order is p0,p1,  p0,p1,  etc.
			
 
				+		uint8_t m_weights[MAX_GRID_WEIGHTS];
			
 
				+		
			
 
				+		// ISE endpoint values
			
 
				+		// Endpoint order examples:
			
 
				+		// 1 subset LA : LL0 LH0 AL0 AH0
			
 
				+		// 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
			
 
				+		// 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
			
 
				+		// 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
			
 
				+		// 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
			
 
				+		// 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
			
 
				+		uint8_t m_endpoints[MAX_ENDPOINTS];
			
 
				+
			
 
				+		void clear()
			
 
				+		{
			
 
				+			memset(this, 0, sizeof(*this));
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	// Open interval
			
 
				+	inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
			
 
				+	inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
			
 
				+
			
 
				+	inline uint32_t get_bits(uint32_t val, int low, int high)
			
 
				+	{
			
 
				+		const int num_bits = (high - low) + 1;
			
 
				+		assert((num_bits >= 1) && (num_bits <= 32));
			
 
				+
			
 
				+		val >>= low;
			
 
				+		if (num_bits != 32)
			
 
				+			val &= ((1u << num_bits) - 1);
			
 
				+
			
 
				+		return val;
			
 
				+	}
			
 
				+
			
 
				+	// Returns the number of levels in the given ISE range.
			
 
				+	inline uint32_t get_ise_levels(uint32_t ise_range) 
			
 
				+	{ 
			
 
				+		assert(ise_range < TOTAL_ISE_RANGES);
			
 
				+		return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
			
 
				+	}
			
 
				+
			
 
				+	inline int get_ise_sequence_bits(int count, int range)
			
 
				+	{
			
 
				+		// See 18.22 Data Size Determination
			
 
				+		int total_bits = g_ise_range_table[range][0] * count;
			
 
				+		total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
			
 
				+		total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
			
 
				+		return total_bits;
			
 
				+	}
			
 
				+		
			
 
				+	inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
			
 
				+	{
			
 
				+		assert(w <= MAX_WEIGHT_VALUE);
			
 
				+		return (l * (64 - w) + h * w + 32) >> 6;
			
 
				+	}
			
 
				+
			
 
				+	void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range);
			
 
				+
			
 
				+	// Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
			
 
				+	bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr);
			
 
				+
			
 
				+	// Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
			
 
				+	void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a);
			
 
				+
			
 
				+	// Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
			
 
				+	void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah);
			
 
				+
			
 
				+	// These helpers are all quite slow, but are useful for table preparation.
			
 
				+	
			
 
				+	// Dequantizes ISE encoded endpoint val to [0,255]
			
 
				+	uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
			
 
				+		
			
 
				+	// Dequantizes ISE encoded weight val to [0,64]
			
 
				+	uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
			
 
				+
			
 
				+	uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
			
 
				+	uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
			
 
				+
			
 
				+	void create_quant_tables(
			
 
				+		uint8_t* pVal_to_ise,	// [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
			
 
				+		uint8_t* pISE_to_val,	// ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
			
 
				+		uint8_t* pISE_to_rank,	// returns the level rank index given an ISE symbol, [levels]
			
 
				+		uint8_t* pRank_to_ISE,  // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
			
 
				+		uint32_t ise_range,		// ise range, [4,20] for endpoints, [0,11] for weights
			
 
				+		bool weight_flag);		// false if block endpoints, true if weights
			
 
				+
			
 
				+	// True if the CEM is LDR.
			
 
				+	bool is_cem_ldr(uint32_t mode);
			
 
				+	inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
			
 
				+
			
 
				+	// True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
			
 
				+	bool is_valid_block_size(uint32_t w, uint32_t h);
			
 
				+
			
 
				+	bool block_has_any_hdr_cems(const log_astc_block& log_blk);
			
 
				+	bool block_has_any_ldr_cems(const log_astc_block& log_blk);
			
 
				+	
			
 
				+	// Returns the # of endpoint values for the given CEM.
			
 
				+	inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
			
 
				+
			
 
				+	struct dequant_table
			
 
				+	{
			
 
				+		basisu::vector<uint8_t> m_val_to_ise;	// [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
			
 
				+		basisu::vector<uint8_t> m_ISE_to_val;	// ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
			
 
				+		basisu::vector<uint8_t> m_ISE_to_rank;	// returns the level rank index given an ISE symbol, [levels]
			
 
				+		basisu::vector<uint8_t> m_rank_to_ISE;  // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]		
			
 
				+
			
 
				+		void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)
			
 
				+		{
			
 
				+			m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);
			
 
				+			m_ISE_to_val.resize(num_levels);
			
 
				+			if (init_rank_tabs)
			
 
				+			{
			
 
				+				m_ISE_to_rank.resize(num_levels);
			
 
				+				m_rank_to_ISE.resize(num_levels);
			
 
				+			}
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	struct dequant_tables
			
 
				+	{
			
 
				+		dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
			
 
				+		dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
			
 
				+
			
 
				+		const dequant_table& get_weight_tab(uint32_t range) const
			
 
				+		{
			
 
				+			assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
			
 
				+			return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
			
 
				+		}
			
 
				+
			
 
				+		dequant_table& get_weight_tab(uint32_t range)
			
 
				+		{
			
 
				+			assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
			
 
				+			return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
			
 
				+		}
			
 
				+
			
 
				+		const dequant_table& get_endpoint_tab(uint32_t range) const
			
 
				+		{
			
 
				+			assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+			return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
			
 
				+		}
			
 
				+
			
 
				+		dequant_table& get_endpoint_tab(uint32_t range)
			
 
				+		{
			
 
				+			assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+			return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
			
 
				+		}
			
 
				+
			
 
				+		void init(bool init_rank_tabs)
			
 
				+		{
			
 
				+			for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
			
 
				+			{
			
 
				+				const uint32_t num_levels = get_ise_levels(range);
			
 
				+				dequant_table& tab = get_weight_tab(range);
			
 
				+
			
 
				+				tab.init(true, num_levels, init_rank_tabs);
			
 
				+
			
 
				+				create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);
			
 
				+			}
			
 
				+
			
 
				+			for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
			
 
				+			{
			
 
				+				const uint32_t num_levels = get_ise_levels(range);
			
 
				+				dequant_table& tab = get_endpoint_tab(range);
			
 
				+
			
 
				+				tab.init(false, num_levels, init_rank_tabs);
			
 
				+
			
 
				+				create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);
			
 
				+			}
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	extern dequant_tables g_dequant_tables;
			
 
				+	void init_tables(bool init_rank_tabs);
			
 
				+		
			
 
				+	// Procedurally returns the texel partition/subset index given the block coordinate and config.
			
 
				+	int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
			
 
				+		
			
 
				+	void blue_contract(
			
 
				+		int r, int g, int b, int a,
			
 
				+		int& dr, int& dg, int& db, int& da);
			
 
				+
			
 
				+	void bit_transfer_signed(int& a, int& b);
			
 
				+
			
 
				+	void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
			
 
				+
			
 
				+	typedef uint16_t half_float;
			
 
				+	half_float float_to_half(float val, bool toward_zero);
			
 
				+	float half_to_float(half_float hval);
			
 
				+
			
 
				+	const int MAX_RGB9E5 = 0xff80;
			
 
				+	void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
			
 
				+	uint32_t pack_rgb9e5(float r, float g, float b);
			
 
				+	
			
 
				+	enum decode_mode
			
 
				+	{
			
 
				+		cDecodeModeSRGB8 = 0,	// returns uint8_t's, not valid on HDR blocks
			
 
				+		cDecodeModeLDR8 = 1,	// returns uint8_t's, not valid on HDR blocks
			
 
				+		cDecodeModeHDR16 = 2,   // returns uint16_t's (half floats), valid on all LDR/HDR blocks
			
 
				+		cDecodeModeRGB9E5 = 3	// returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
			
 
				+	};
			
 
				+
			
 
				+	// Decodes logical block to output pixels.
			
 
				+	// pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
			
 
				+	bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
			
 
				+
			
 
				+	void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
			
 
				+
			
 
				+	// Unpack a physical ASTC encoded GPU texture block to a logical block description.
			
 
				+	bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
			
 
				+					
			
 
				+} // namespace astc_helpers
			
 
				+
			
 
				+#endif // BASISU_ASTC_HELPERS_HEADER
			
 
				+
			
 
				+//------------------------------------------------------------------
			
 
				+
			
 
				+#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
			
 
				+
			
 
				+namespace astc_helpers
			
 
				+{
			
 
				+	template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
			
 
				+	template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
			
 
				+
			
 
				+	const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { 
			
 
				+		{ 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, 
			
 
				+		{ 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, 
			
 
				+		{ 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, 
			
 
				+		{ 12, 10 }, { 12, 12 } 
			
 
				+	};
			
 
				+
			
 
				+	const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
			
 
				+	{
			
 
				+		//b  t  q
			
 
				+		//2  3  5	 // rng  ise_index	notes
			
 
				+		{ 1, 0, 0 }, // 0..1 0
			
 
				+		{ 0, 1, 0 }, // 0..2 1
			
 
				+		{ 2, 0, 0 }, // 0..3 2
			
 
				+		{ 0, 0, 1 }, // 0..4 3
			
 
				+		{ 1, 1, 0 }, // 0..5 4			min endpoint ISE index
			
 
				+		{ 3, 0, 0 }, // 0..7 5
			
 
				+		{ 1, 0, 1 }, // 0..9 6
			
 
				+		{ 2, 1, 0 }, // 0..11 7
			
 
				+		{ 4, 0, 0 }, // 0..15 8
			
 
				+		{ 2, 0, 1 }, // 0..19 9
			
 
				+		{ 3, 1, 0 }, // 0..23 10
			
 
				+		{ 5, 0, 0 }, // 0..31 11		max weight ISE index
			
 
				+		{ 3, 0, 1 }, // 0..39 12
			
 
				+		{ 4, 1, 0 }, // 0..47 13
			
 
				+		{ 6, 0, 0 }, // 0..63 14
			
 
				+		{ 4, 0, 1 }, // 0..79 15
			
 
				+		{ 5, 1, 0 }, // 0..95 16
			
 
				+		{ 7, 0, 0 }, // 0..127 17
			
 
				+		{ 5, 0, 1 }, // 0..159 18
			
 
				+		{ 6, 1, 0 }, // 0..191 19
			
 
				+		{ 8, 0, 0 }, // 0..255 20
			
 
				+	};
			
 
				+		
			
 
				+	static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
			
 
				+	{
			
 
				+		uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
			
 
				+
			
 
				+		assert(codesize <= 9);
			
 
				+		if (codesize)
			
 
				+		{
			
 
				+			uint32_t byte_bit_offset = bit_offset & 7;
			
 
				+			uint32_t val = code << byte_bit_offset;
			
 
				+
			
 
				+			uint32_t index = bit_offset >> 3;
			
 
				+			pBuf[index] |= (uint8_t)val;
			
 
				+
			
 
				+			if (codesize > (8 - byte_bit_offset))
			
 
				+				pBuf[index + 1] |= (uint8_t)(val >> 8);
			
 
				+
			
 
				+			bit_offset += codesize;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
			
 
				+	{
			
 
				+		return (bits >> low) & ((1 << (high - low + 1)) - 1);
			
 
				+	}
			
 
				+
			
 
				+	// Writes bits to output in an endian safe way
			
 
				+	static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
			
 
				+	{
			
 
				+		assert(total_bits <= 31);
			
 
				+		assert(value < (1u << total_bits));
			
 
				+
			
 
				+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
			
 
				+
			
 
				+		while (total_bits)
			
 
				+		{
			
 
				+			const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
			
 
				+
			
 
				+			pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
			
 
				+
			
 
				+			bit_pos += bits_to_write;
			
 
				+			total_bits -= bits_to_write;
			
 
				+			value >>= bits_to_write;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static const uint8_t g_astc_quint_encode[125] =
			
 
				+	{
			
 
				+		0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
			
 
				+		58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
			
 
				+		105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
			
 
				+		126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
			
 
				+	};
			
 
				+
			
 
				+	// Encodes 3 values to output, usable for any range that uses quints and bits
			
 
				+	static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
			
 
				+	{
			
 
				+		// First extract the quints and the bits from the 3 input values
			
 
				+		int quints = 0, bits[3];
			
 
				+		const uint32_t bit_mask = (1 << n) - 1;
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+		{
			
 
				+			static const int s_muls[3] = { 1, 5, 25 };
			
 
				+
			
 
				+			const int t = pValues[i] >> n;
			
 
				+
			
 
				+			quints += t * s_muls[i];
			
 
				+			bits[i] = pValues[i] & bit_mask;
			
 
				+		}
			
 
				+
			
 
				+		// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
			
 
				+		// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
			
 
				+
			
 
				+		assert(quints < 125);
			
 
				+		const int T = g_astc_quint_encode[quints];
			
 
				+
			
 
				+		// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
			
 
				+		astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
			
 
				+			(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
			
 
				+	}
			
 
				+
			
 
				+	static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
			
 
				+		43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
			
 
				+		131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
			
 
				+		208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
			
 
				+		225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
			
 
				+		191, 223, 124, 125, 126 };
			
 
				+
			
 
				+	// Encodes 5 values to output, usable for any range that uses trits and bits
			
 
				+	static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
			
 
				+	{
			
 
				+		// First extract the trits and the bits from the 5 input values
			
 
				+		int trits = 0, bits[5];
			
 
				+		const uint32_t bit_mask = (1 << n) - 1;
			
 
				+		for (int i = 0; i < 5; i++)
			
 
				+		{
			
 
				+			static const int s_muls[5] = { 1, 3, 9, 27, 81 };
			
 
				+
			
 
				+			const int t = pValues[i] >> n;
			
 
				+
			
 
				+			trits += t * s_muls[i];
			
 
				+			bits[i] = pValues[i] & bit_mask;
			
 
				+		}
			
 
				+
			
 
				+		// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
			
 
				+		// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
			
 
				+
			
 
				+		assert(trits < 243);
			
 
				+		const int T = g_astc_trit_encode[trits];
			
 
				+
			
 
				+		// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
			
 
				+		astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
			
 
				+
			
 
				+		astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
			
 
				+			(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
			
 
				+	}
			
 
				+
			
 
				+	// Packs values using ASTC's BISE to output buffer.
			
 
				+	void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range)
			
 
				+	{
			
 
				+		uint32_t temp[5] = { 0 };
			
 
				+
			
 
				+		const int num_bits = g_ise_range_table[range][0];
			
 
				+
			
 
				+		int group_size = 0;
			
 
				+		if (g_ise_range_table[range][1])
			
 
				+			group_size = 5;
			
 
				+		else if (g_ise_range_table[range][2])
			
 
				+			group_size = 3;
			
 
				+
			
 
				+#ifndef NDEBUG
			
 
				+		const uint32_t num_levels = get_ise_levels(range);
			
 
				+		for (int i = 0; i < num_vals; i++)
			
 
				+		{
			
 
				+			assert(pSrc_vals[i] < num_levels);
			
 
				+		}
			
 
				+#endif
			
 
				+
			
 
				+		if (group_size)
			
 
				+		{
			
 
				+			// Range has trits or quints - pack each group of 5 or 3 values 
			
 
				+			const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
			
 
				+
			
 
				+			for (int group_index = 0; group_index < total_groups; group_index++)
			
 
				+			{
			
 
				+				uint8_t vals[5] = { 0 };
			
 
				+
			
 
				+				const int limit = my_min(group_size, num_vals - group_index * group_size);
			
 
				+				for (int i = 0; i < limit; i++)
			
 
				+					vals[i] = pSrc_vals[group_index * group_size + i];
			
 
				+
			
 
				+				if (group_size == 5)
			
 
				+					astc_encode_trits(temp, vals, bit_pos, num_bits);
			
 
				+				else
			
 
				+					astc_encode_quints(temp, vals, bit_pos, num_bits);
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			for (int i = 0; i < num_vals; i++)
			
 
				+				astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
			
 
				+		}
			
 
				+
			
 
				+		// TODO: Could this write too many bits on incomplete blocks?
			
 
				+		pDst[0] |= temp[0]; pDst[1] |= temp[1];
			
 
				+		pDst[2] |= temp[2]; pDst[3] |= temp[3];
			
 
				+	}
			
 
				+
			
 
				+	inline uint32_t rev_dword(uint32_t bits)
			
 
				+	{
			
 
				+		uint32_t v = (bits << 16) | (bits >> 16);
			
 
				+		v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
			
 
				+		v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
			
 
				+		return v;
			
 
				+	}
			
 
				+
			
 
				+	static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
			
 
				+
			
 
				+	static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
			
 
				+	{
			
 
				+		config_bits = 0;
			
 
				+
			
 
				+		const int W = log_block.m_grid_width, H = log_block.m_grid_height;
			
 
				+
			
 
				+		const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
			
 
				+		const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
			
 
				+		
			
 
				+		// See Tables 81-82
			
 
				+		// Compute p from weight range
			
 
				+		uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
			
 
				+		
			
 
				+		// Rearrange p's bits to p0 p2 p1
			
 
				+		p = (p >> 1) + ((p & 1) << 2);
			
 
				+		
			
 
				+		// Try encoding each row of table 82.
			
 
				+
			
 
				+		// W+4 H+2
			
 
				+		if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// W+8 H+2
			
 
				+		if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// W+2 H+8
			
 
				+		if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// W+2 H+6
			
 
				+		if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// W+2 H+2
			
 
				+		if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
			
 
				+			return true;
			
 
				+		}
			
 
				+				
			
 
				+		// 12 H+2
			
 
				+		if ((W == 12) && is_packable(H - 2, 2))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// W+2 12
			
 
				+		if ((H == 12) && is_packable(W - 2, 2))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// 6 10
			
 
				+		if ((W == 6) && (H == 10))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// 10 6
			
 
				+		if ((W == 10) && (H == 6))
			
 
				+		{
			
 
				+			config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
			
 
				+			return true;
			
 
				+		}
			
 
				+				
			
 
				+		// W+6 H+6 (no dual plane or high prec)
			
 
				+		if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
			
 
				+		{
			
 
				+			config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// Failed: unsupported weight grid dimensions or config.
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range)
			
 
				+	{
			
 
				+		memset(&phys_block, 0, sizeof(phys_block));
			
 
				+
			
 
				+		if (pExpected_endpoint_range)
			
 
				+			*pExpected_endpoint_range = -1;
			
 
				+
			
 
				+		assert(!log_block.m_error_flag);
			
 
				+		if (log_block.m_error_flag)
			
 
				+			return false;
			
 
				+				
			
 
				+		if (log_block.m_solid_color_flag_ldr)
			
 
				+		{
			
 
				+			pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
			
 
				+			return true;
			
 
				+		}
			
 
				+		else if (log_block.m_solid_color_flag_hdr)
			
 
				+		{
			
 
				+			pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
			
 
				+			return true;
			
 
				+		}
			
 
				+				
			
 
				+		if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
			
 
				+			return false;
			
 
				+
			
 
				+		// Max usable weight range is 11
			
 
				+		if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
			
 
				+			return false;
			
 
				+
			
 
				+		// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
			
 
				+		if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
			
 
				+			return false;
			
 
				+
			
 
				+		if (log_block.m_color_component_selector > 3)
			
 
				+			return false;
			
 
				+				
			
 
				+		uint32_t config_bits = 0;
			
 
				+		if (!get_config_bits(log_block, config_bits))
			
 
				+			return false;
			
 
				+
			
 
				+		uint32_t bit_pos = 0;
			
 
				+		astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
			
 
				+
			
 
				+		const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
			
 
				+		const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
			
 
				+
			
 
				+		// 18.24 Illegal Encodings
			
 
				+		if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
			
 
				+			return false;
			
 
				+
			
 
				+		uint32_t total_extra_bits = 0;
			
 
				+
			
 
				+		astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
			
 
				+
			
 
				+		if (log_block.m_num_partitions > 1)
			
 
				+		{
			
 
				+			if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
			
 
				+				return false;
			
 
				+
			
 
				+			astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
			
 
				+
			
 
				+			uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
			
 
				+			for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
			
 
				+			{
			
 
				+				highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]);
			
 
				+				lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]);
			
 
				+			}
			
 
				+
			
 
				+			if (highest_cem > 15)
			
 
				+				return false;
			
 
				+			
			
 
				+			// Ensure CEM range is contiguous
			
 
				+			if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
			
 
				+				return false;
			
 
				+
			
 
				+			// See tables 79/80
			
 
				+			uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
			
 
				+			if (lowest_cem != highest_cem)
			
 
				+			{
			
 
				+				encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
			
 
				+
			
 
				+				// See tables at 23.11 Color Endpoint Mode
			
 
				+				for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
			
 
				+				{
			
 
				+					const int M = log_block.m_color_endpoint_modes[j] & 3;
			
 
				+					
			
 
				+					const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
			
 
				+					if ((C & 1) != C)
			
 
				+						return false;
			
 
				+
			
 
				+					encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
			
 
				+				}
			
 
				+
			
 
				+				total_extra_bits = 3 * log_block.m_num_partitions - 4;
			
 
				+
			
 
				+				if ((total_weight_bits + total_extra_bits) > 128)
			
 
				+					return false;
			
 
				+
			
 
				+				uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
			
 
				+				astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
			
 
				+			}
			
 
				+
			
 
				+			astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (log_block.m_partition_id)
			
 
				+				return false;
			
 
				+			if (log_block.m_color_endpoint_modes[0] > 15)
			
 
				+				return false;
			
 
				+
			
 
				+			astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
			
 
				+		}
			
 
				+
			
 
				+		if (log_block.m_dual_plane)
			
 
				+		{
			
 
				+			if (log_block.m_num_partitions > 3)
			
 
				+				return false;
			
 
				+
			
 
				+			total_extra_bits += 2;
			
 
				+			
			
 
				+			uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
			
 
				+			astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
			
 
				+		}
			
 
				+
			
 
				+		const uint32_t total_config_bits = bit_pos + total_extra_bits;
			
 
				+		const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
			
 
				+		if (num_remaining_bits < 0)
			
 
				+			return false;
			
 
				+
			
 
				+		uint32_t total_cem_vals = 0;
			
 
				+		for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
			
 
				+			total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
			
 
				+
			
 
				+		if (total_cem_vals > MAX_ENDPOINTS)
			
 
				+			return false;
			
 
				+
			
 
				+		int endpoint_ise_range = -1;
			
 
				+		for (int k = 20; k > 0; k--)
			
 
				+		{
			
 
				+			int bits = get_ise_sequence_bits(total_cem_vals, k);
			
 
				+			if (bits <= num_remaining_bits)
			
 
				+			{
			
 
				+				endpoint_ise_range = k;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
			
 
				+		if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
			
 
				+			return false;
			
 
				+
			
 
				+		// Ensure the caller utilized the right endpoint ISE range.
			
 
				+		if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
			
 
				+		{
			
 
				+			if (pExpected_endpoint_range)
			
 
				+				*pExpected_endpoint_range = endpoint_ise_range;
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		// Pack endpoints forwards
			
 
				+		encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
			
 
				+		
			
 
				+		// Pack weights backwards
			
 
				+		uint32_t weight_data[4] = { 0 };
			
 
				+		encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 4; i++)
			
 
				+			phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
			
 
				+	{
			
 
				+		assert(num_src_bits <= num_dst_bits);
			
 
				+		assert((src & ((1 << num_src_bits) - 1)) == src);
			
 
				+
			
 
				+		uint32_t dst = 0;
			
 
				+		for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
			
 
				+			dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
			
 
				+
			
 
				+		return dst;
			
 
				+	}
			
 
				+
			
 
				+	uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
			
 
				+	{
			
 
				+		assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+		assert(val < get_ise_levels(ise_range));
			
 
				+
			
 
				+		uint32_t u = 0;
			
 
				+
			
 
				+		switch (ise_range)
			
 
				+		{
			
 
				+		case 5:
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 3, 8);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 8:
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 4, 8);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 11:
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 5, 8);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 14:
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 6, 8);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 17:
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 7, 8);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 20:
			
 
				+		{
			
 
				+			u = val;
			
 
				+			break;
			
 
				+		}
			
 
				+		case 4:
			
 
				+		case 6:
			
 
				+		case 7:
			
 
				+		case 9:
			
 
				+		case 10:
			
 
				+		case 12:
			
 
				+		case 13:
			
 
				+		case 15:
			
 
				+		case 16:
			
 
				+		case 18:
			
 
				+		case 19:
			
 
				+		{
			
 
				+			const uint32_t num_bits = g_ise_range_table[ise_range][0];
			
 
				+			const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
			
 
				+			const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
			
 
				+
			
 
				+			// compute Table 103 row index
			
 
				+			const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
			
 
				+
			
 
				+			assert(range_index >= 0 && range_index <= 10);
			
 
				+
			
 
				+			uint32_t bits = val & ((1 << num_bits) - 1);
			
 
				+			uint32_t tval = val >> num_bits;
			
 
				+
			
 
				+			assert(tval < (num_trits ? 3U : 5U));
			
 
				+
			
 
				+			uint32_t a = bits & 1;
			
 
				+			uint32_t b = (bits >> 1) & 1;
			
 
				+			uint32_t c = (bits >> 2) & 1;
			
 
				+			uint32_t d = (bits >> 3) & 1;
			
 
				+			uint32_t e = (bits >> 4) & 1;
			
 
				+			uint32_t f = (bits >> 5) & 1;
			
 
				+
			
 
				+			uint32_t A = a ? 511 : 0;
			
 
				+			uint32_t B = 0;
			
 
				+
			
 
				+			switch (range_index)
			
 
				+			{
			
 
				+			case 2:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// b000b0bb0
			
 
				+				B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 3:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// b0000bb00
			
 
				+				B = (b << 2) | (b << 3) | (b << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 4:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// cb000cbcb
			
 
				+				B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 5:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// cb0000cbc
			
 
				+				B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 6:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// dcb000dcb
			
 
				+				B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 7:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// dcb0000dc
			
 
				+				B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 8:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// edcb000ed
			
 
				+				B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 9:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// edcb0000e
			
 
				+				B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			case 10:
			
 
				+			{
			
 
				+				// 876543210
			
 
				+				// fedcb000f
			
 
				+				B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
			
 
				+				break;
			
 
				+			}
			
 
				+			default:
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
			
 
				+			uint32_t C = C_vals[range_index];
			
 
				+			uint32_t D = tval;
			
 
				+
			
 
				+			u = D * C + B;
			
 
				+			u = u ^ A;
			
 
				+			u = (A & 0x80) | (u >> 2);
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		}
			
 
				+
			
 
				+		return u;
			
 
				+	}
			
 
				+
			
 
				+	uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
			
 
				+	{
			
 
				+		assert(val < get_ise_levels(ise_range));
			
 
				+
			
 
				+		uint32_t u = 0;
			
 
				+		switch (ise_range)
			
 
				+		{
			
 
				+		case 0: 
			
 
				+		{
			
 
				+			u = val ? 63 : 0;
			
 
				+			break;
			
 
				+		}
			
 
				+		case 1: // 0-2 
			
 
				+		{
			
 
				+			const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
			
 
				+			u = s_tab_0_2[val];
			
 
				+			break;
			
 
				+		}
			
 
				+		case 2: // 0-3
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 2, 6);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 3: // 0-4
			
 
				+		{
			
 
				+			const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
			
 
				+			u = s_tab_0_4[val];
			
 
				+			break;
			
 
				+		}
			
 
				+		case 5: // 0-7
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 3, 6);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 8: // 0-15
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 4, 6);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 11: // 0-31
			
 
				+		{
			
 
				+			u = bit_replication_scale(val, 5, 6);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 4: // 0-5
			
 
				+		case 6: // 0-9
			
 
				+		case 7: // 0-11
			
 
				+		case 9: // 0-19
			
 
				+		case 10: // 0-23
			
 
				+		{
			
 
				+			const uint32_t num_bits = g_ise_range_table[ise_range][0];
			
 
				+			const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
			
 
				+			const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
			
 
				+			
			
 
				+			// compute Table 103 row index
			
 
				+			const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
			
 
				+
			
 
				+			// Extract bits and tris/quints from value
			
 
				+			const uint32_t bits = val & ((1u << num_bits) - 1);
			
 
				+			const uint32_t D = val >> num_bits;
			
 
				+
			
 
				+			assert(D < (num_trits ? 3U : 5U));
			
 
				+
			
 
				+			// Now dequantize
			
 
				+			// See Table 103. ASTC weight unquantization parameters
			
 
				+			static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
			
 
				+					
			
 
				+			const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
			
 
				+
			
 
				+			const uint32_t A = (a == 0) ? 0 : 0x7F;
			
 
				+						
			
 
				+			uint32_t B = 0;
			
 
				+			if (range_index == 4)
			
 
				+				B = ((b << 6) | (b << 2) | (b << 0));
			
 
				+			else if (range_index == 5)
			
 
				+				B = ((b << 6) | (b << 1));
			
 
				+			else if (range_index == 6)
			
 
				+				B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
			
 
				+
			
 
				+			const uint32_t C = C_table[range_index - 2];
			
 
				+
			
 
				+			u = D * C + B;
			
 
				+			u = u ^ A;
			
 
				+			u = (A & 0x20) | (u >> 2);
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+			assert(0);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (u > 32)
			
 
				+			u++;
			
 
				+
			
 
				+		return u;
			
 
				+	}
			
 
				+
			
 
				+	// Returns the nearest ISE symbol given a [0,255] endpoint value.
			
 
				+	uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
			
 
				+	{
			
 
				+		assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
			
 
				+
			
 
				+		const uint32_t total_levels = get_ise_levels(ise_range);
			
 
				+		int best_e = INT_MAX, best_index = 0;
			
 
				+		for (uint32_t i = 0; i < total_levels; i++)
			
 
				+		{
			
 
				+			const int qv = dequant_bise_endpoint(i, ise_range);
			
 
				+			int e = labs(v - qv);
			
 
				+			if (e < best_e)
			
 
				+			{
			
 
				+				best_e = e;
			
 
				+				best_index = i;
			
 
				+				if (!best_e)
			
 
				+					break;
			
 
				+			}
			
 
				+		}
			
 
				+		return best_index;
			
 
				+	}
			
 
				+
			
 
				+	// Returns the nearest ISE weight given a [0,64] endpoint value.
			
 
				+	uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
			
 
				+	{
			
 
				+		assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
			
 
				+		assert(v <= (int)MAX_WEIGHT_VALUE);
			
 
				+
			
 
				+		const uint32_t total_levels = get_ise_levels(ise_range);
			
 
				+		int best_e = INT_MAX, best_index = 0;
			
 
				+		for (uint32_t i = 0; i < total_levels; i++)
			
 
				+		{
			
 
				+			const int qv = dequant_bise_weight(i, ise_range);
			
 
				+			int e = labs(v - qv);
			
 
				+			if (e < best_e)
			
 
				+			{
			
 
				+				best_e = e;
			
 
				+				best_index = i;
			
 
				+				if (!best_e)
			
 
				+					break;
			
 
				+			}
			
 
				+		}
			
 
				+		return best_index;
			
 
				+	}
			
 
				+
			
 
				+	void create_quant_tables(
			
 
				+		uint8_t* pVal_to_ise,	// [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
			
 
				+		uint8_t* pISE_to_val,	// ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
			
 
				+		uint8_t* pISE_to_rank,	// returns the level rank index given an ISE symbol, [levels]
			
 
				+		uint8_t* pRank_to_ISE,  // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
			
 
				+		uint32_t ise_range,		// ise range, [4,20] for endpoints, [0,11] for weights
			
 
				+		bool weight_flag)		// false if block endpoints, true if weights
			
 
				+	{
			
 
				+		const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_dequant_vals; i++)
			
 
				+		{
			
 
				+			uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
			
 
				+
			
 
				+			if (pVal_to_ise)
			
 
				+				pVal_to_ise[i] = (uint8_t)bise_index;
			
 
				+
			
 
				+			if (pISE_to_val)
			
 
				+				pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
			
 
				+		}
			
 
				+
			
 
				+		if (pISE_to_rank || pRank_to_ISE)
			
 
				+		{
			
 
				+			const uint32_t num_levels = get_ise_levels(ise_range);
			
 
				+
			
 
				+			if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
			
 
				+			{
			
 
				+				// Only bits
			
 
				+				for (uint32_t i = 0; i < num_levels; i++)
			
 
				+				{
			
 
				+					if (pISE_to_rank)
			
 
				+						pISE_to_rank[i] = (uint8_t)i;
			
 
				+
			
 
				+					if (pRank_to_ISE)
			
 
				+						pRank_to_ISE[i] = (uint8_t)i;
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// Range has trits or quints
			
 
				+				uint32_t vals[256];
			
 
				+				for (uint32_t i = 0; i < num_levels; i++)
			
 
				+				{
			
 
				+					uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
			
 
				+					
			
 
				+					// Low=ISE value
			
 
				+					// High=dequantized value
			
 
				+					vals[i] = (v << 16) | i;
			
 
				+				}
			
 
				+				
			
 
				+				// Sorts by dequantized value
			
 
				+				std::sort(vals, vals + num_levels);
			
 
				+				
			
 
				+				for (uint32_t rank = 0; rank < num_levels; rank++)
			
 
				+				{
			
 
				+					uint32_t ise_val = (uint8_t)vals[rank];
			
 
				+
			
 
				+					if (pISE_to_rank)
			
 
				+						pISE_to_rank[ise_val] = (uint8_t)rank;
			
 
				+					
			
 
				+					if (pRank_to_ISE)
			
 
				+						pRank_to_ISE[rank] = (uint8_t)ise_val;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
			
 
				+	{
			
 
				+		uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
			
 
				+		memset(pDst, 0xFF, 16);
			
 
				+
			
 
				+		pDst[0] = 0b11111100;
			
 
				+		pDst[1] = 0b11111101;
			
 
				+
			
 
				+		pDst[8] = (uint8_t)rh;
			
 
				+		pDst[9] = (uint8_t)(rh >> 8);
			
 
				+		pDst[10] = (uint8_t)gh;
			
 
				+		pDst[11] = (uint8_t)(gh >> 8);
			
 
				+		pDst[12] = (uint8_t)bh;
			
 
				+		pDst[13] = (uint8_t)(bh >> 8);
			
 
				+		pDst[14] = (uint8_t)ah;
			
 
				+		pDst[15] = (uint8_t)(ah >> 8);
			
 
				+	}
			
 
				+
			
 
				+	// rh-ah are half-floats
			
 
				+	void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah) 
			
 
				+	{
			
 
				+		uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
			
 
				+		memset(pDst, 0xFF, 16);
			
 
				+
			
 
				+		pDst[0] = 0b11111100;
			
 
				+		
			
 
				+		pDst[8] = (uint8_t)rh;
			
 
				+		pDst[9] = (uint8_t)(rh >> 8);
			
 
				+		pDst[10] = (uint8_t)gh;
			
 
				+		pDst[11] = (uint8_t)(gh >> 8);
			
 
				+		pDst[12] = (uint8_t)bh;
			
 
				+		pDst[13] = (uint8_t)(bh >> 8);
			
 
				+		pDst[14] = (uint8_t)ah;
			
 
				+		pDst[15] = (uint8_t)(ah >> 8);
			
 
				+	}
			
 
				+		
			
 
				+	bool is_cem_ldr(uint32_t mode)
			
 
				+	{
			
 
				+		switch (mode)
			
 
				+		{
			
 
				+		case CEM_LDR_LUM_DIRECT:
			
 
				+		case CEM_LDR_LUM_BASE_PLUS_OFS:
			
 
				+		case CEM_LDR_LUM_ALPHA_DIRECT:
			
 
				+		case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
			
 
				+		case CEM_LDR_RGB_BASE_SCALE:
			
 
				+		case CEM_LDR_RGB_DIRECT:
			
 
				+		case CEM_LDR_RGB_BASE_PLUS_OFFSET:
			
 
				+		case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
			
 
				+		case CEM_LDR_RGBA_DIRECT:
			
 
				+		case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
			
 
				+			return true;
			
 
				+		default:
			
 
				+			break;
			
 
				+		}
			
 
				+	
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool is_valid_block_size(uint32_t w, uint32_t h)
			
 
				+	{
			
 
				+		assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));
			
 
				+		assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));
			
 
				+
			
 
				+#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
			
 
				+		SIZECHK(4, 4);
			
 
				+		SIZECHK(5, 4);
			
 
				+
			
 
				+		SIZECHK(5, 5);
			
 
				+
			
 
				+		SIZECHK(6, 5);
			
 
				+		SIZECHK(6, 6);
			
 
				+
			
 
				+		SIZECHK(8, 5);
			
 
				+		SIZECHK(8, 6);
			
 
				+		SIZECHK(10, 5);
			
 
				+		SIZECHK(10, 6);
			
 
				+
			
 
				+		SIZECHK(8, 8);
			
 
				+		SIZECHK(10, 8);
			
 
				+		SIZECHK(10, 10);
			
 
				+
			
 
				+		SIZECHK(12, 10);
			
 
				+		SIZECHK(12, 12);
			
 
				+#undef SIZECHK
			
 
				+
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool block_has_any_hdr_cems(const log_astc_block& log_blk)
			
 
				+	{
			
 
				+		assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
			
 
				+
			
 
				+		for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
			
 
				+			if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
			
 
				+				return true;
			
 
				+
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool block_has_any_ldr_cems(const log_astc_block& log_blk)
			
 
				+	{
			
 
				+		assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
			
 
				+
			
 
				+		for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
			
 
				+			if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
			
 
				+				return true;
			
 
				+
			
 
				+		return false;
			
 
				+	}
			
 
				+		
			
 
				+	dequant_tables g_dequant_tables;
			
 
				+
			
 
				+	void precompute_texel_partitions_4x4();
			
 
				+
			
 
				+	void init_tables(bool init_rank_tabs)
			
 
				+	{
			
 
				+		g_dequant_tables.init(init_rank_tabs);
			
 
				+		
			
 
				+		precompute_texel_partitions_4x4();
			
 
				+	}
			
 
				+
			
 
				+	struct weighted_sample
			
 
				+	{
			
 
				+		uint8_t m_src_x;
			
 
				+		uint8_t m_src_y;
			
 
				+		uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
			
 
				+	};
			
 
				+
			
 
				+	static void compute_upsample_weights(
			
 
				+		int block_width, int block_height,
			
 
				+		int weight_grid_width, int weight_grid_height,
			
 
				+		weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
			
 
				+	{
			
 
				+		const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
			
 
				+		const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
			
 
				+
			
 
				+		for (int texelY = 0; texelY < block_height; texelY++)
			
 
				+		{
			
 
				+			for (int texelX = 0; texelX < block_width; texelX++)
			
 
				+			{
			
 
				+				const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
			
 
				+				const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
			
 
				+				const uint32_t jX = gX >> 4;
			
 
				+				const uint32_t jY = gY >> 4;
			
 
				+				const uint32_t fX = gX & 0xf;
			
 
				+				const uint32_t fY = gY & 0xf;
			
 
				+				const uint32_t w11 = (fX * fY + 8) >> 4;
			
 
				+				const uint32_t w10 = fY - w11;
			
 
				+				const uint32_t w01 = fX - w11;
			
 
				+				const uint32_t w00 = 16 - fX - fY + w11;
			
 
				+
			
 
				+				weighted_sample& s = pWeights[texelX + texelY * block_width];
			
 
				+				s.m_src_x = (uint8_t)jX;
			
 
				+				s.m_src_y = (uint8_t)jY;
			
 
				+				s.m_weights[0][0] = (uint8_t)w00;
			
 
				+				s.m_weights[0][1] = (uint8_t)w01;
			
 
				+				s.m_weights[1][0] = (uint8_t)w10;
			
 
				+				s.m_weights[1][1] = (uint8_t)w11;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Should be dequantized [0,64] weights
			
 
				+	static void upsample_weight_grid(
			
 
				+		uint32_t bx, uint32_t by,		// destination/to dimension
			
 
				+		uint32_t wx, uint32_t wy,		// source/from dimension
			
 
				+		const uint8_t* pSrc_weights,	// these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
			
 
				+		uint8_t* pDst_weights)			// [by][bx]
			
 
				+	{
			
 
				+		assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
			
 
				+		assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
			
 
				+
			
 
				+		const uint32_t total_src_weights = wx * wy;
			
 
				+		const uint32_t total_dst_weights = bx * by;
			
 
				+
			
 
				+		if (total_src_weights == total_dst_weights)
			
 
				+		{
			
 
				+			memcpy(pDst_weights, pSrc_weights, total_src_weights);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		weighted_sample weights[12 * 12];
			
 
				+		compute_upsample_weights(bx, by, wx, wy, weights);
			
 
				+
			
 
				+		const weighted_sample* pS = weights;
			
 
				+
			
 
				+		for (uint32_t y = 0; y < by; y++)
			
 
				+		{
			
 
				+			for (uint32_t x = 0; x < bx; x++, ++pS)
			
 
				+			{
			
 
				+				const uint32_t w00 = pS->m_weights[0][0];
			
 
				+				const uint32_t w01 = pS->m_weights[0][1];
			
 
				+				const uint32_t w10 = pS->m_weights[1][0];
			
 
				+				const uint32_t w11 = pS->m_weights[1][1];
			
 
				+
			
 
				+				assert(w00 || w01 || w10 || w11);
			
 
				+
			
 
				+				const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
			
 
				+
			
 
				+				uint32_t total = 8;
			
 
				+				if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
			
 
				+				if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
			
 
				+				if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
			
 
				+				if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
			
 
				+
			
 
				+				pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	inline uint32_t hash52(uint32_t v)
			
 
				+	{
			
 
				+		uint32_t p = v;
			
 
				+		p ^= p >> 15;   p -= p << 17;   p += p << 7;    p += p << 4;
			
 
				+		p ^= p >> 5;   p += p << 16;   p ^= p >> 7;    p ^= p >> 3;
			
 
				+		p ^= p << 6;   p ^= p >> 17;
			
 
				+		return p;
			
 
				+	}
			
 
				+
			
 
				+	int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
			
 
				+	{
			
 
				+		assert(zIn == 0);
			
 
				+
			
 
				+		const uint32_t  x = small_block ? xIn << 1 : xIn;
			
 
				+		const uint32_t  y = small_block ? yIn << 1 : yIn;
			
 
				+		const uint32_t  z = small_block ? zIn << 1 : zIn;
			
 
				+		const uint32_t  seed = seedIn + 1024 * (num_partitions - 1);
			
 
				+		const uint32_t  rnum = hash52(seed);
			
 
				+
			
 
				+		uint8_t         seed1 = (uint8_t)(rnum & 0xf);
			
 
				+		uint8_t         seed2 = (uint8_t)((rnum >> 4) & 0xf);
			
 
				+		uint8_t         seed3 = (uint8_t)((rnum >> 8) & 0xf);
			
 
				+		uint8_t         seed4 = (uint8_t)((rnum >> 12) & 0xf);
			
 
				+		uint8_t         seed5 = (uint8_t)((rnum >> 16) & 0xf);
			
 
				+		uint8_t         seed6 = (uint8_t)((rnum >> 20) & 0xf);
			
 
				+		uint8_t         seed7 = (uint8_t)((rnum >> 24) & 0xf);
			
 
				+		uint8_t         seed8 = (uint8_t)((rnum >> 28) & 0xf);
			
 
				+		uint8_t         seed9 = (uint8_t)((rnum >> 18) & 0xf);
			
 
				+		uint8_t         seed10 = (uint8_t)((rnum >> 22) & 0xf);
			
 
				+		uint8_t         seed11 = (uint8_t)((rnum >> 26) & 0xf);
			
 
				+		uint8_t         seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
			
 
				+
			
 
				+		seed1 = (uint8_t)(seed1 * seed1);
			
 
				+		seed2 = (uint8_t)(seed2 * seed2);
			
 
				+		seed3 = (uint8_t)(seed3 * seed3);
			
 
				+		seed4 = (uint8_t)(seed4 * seed4);
			
 
				+		seed5 = (uint8_t)(seed5 * seed5);
			
 
				+		seed6 = (uint8_t)(seed6 * seed6);
			
 
				+		seed7 = (uint8_t)(seed7 * seed7);
			
 
				+		seed8 = (uint8_t)(seed8 * seed8);
			
 
				+		seed9 = (uint8_t)(seed9 * seed9);
			
 
				+		seed10 = (uint8_t)(seed10 * seed10);
			
 
				+		seed11 = (uint8_t)(seed11 * seed11);
			
 
				+		seed12 = (uint8_t)(seed12 * seed12);
			
 
				+
			
 
				+		const int shA = (seed & 2) != 0 ? 4 : 5;
			
 
				+		const int shB = (num_partitions == 3) ? 6 : 5;
			
 
				+		const int sh1 = (seed & 1) != 0 ? shA : shB;
			
 
				+		const int sh2 = (seed & 1) != 0 ? shB : shA;
			
 
				+		const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
			
 
				+
			
 
				+		seed1 = (uint8_t)(seed1 >> sh1);
			
 
				+		seed2 = (uint8_t)(seed2 >> sh2);
			
 
				+		seed3 = (uint8_t)(seed3 >> sh1);
			
 
				+		seed4 = (uint8_t)(seed4 >> sh2);
			
 
				+		seed5 = (uint8_t)(seed5 >> sh1);
			
 
				+		seed6 = (uint8_t)(seed6 >> sh2);
			
 
				+		seed7 = (uint8_t)(seed7 >> sh1);
			
 
				+		seed8 = (uint8_t)(seed8 >> sh2);
			
 
				+		seed9 = (uint8_t)(seed9 >> sh3);
			
 
				+		seed10 = (uint8_t)(seed10 >> sh3);
			
 
				+		seed11 = (uint8_t)(seed11 >> sh3);
			
 
				+		seed12 = (uint8_t)(seed12 >> sh3);
			
 
				+
			
 
				+		const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
			
 
				+		const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
			
 
				+		const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
			
 
				+		const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
			
 
				+
			
 
				+		return (a >= b && a >= c && a >= d) ? 0
			
 
				+			: (b >= c && b >= d) ? 1
			
 
				+			: (c >= d) ? 2
			
 
				+			: 3;
			
 
				+	}
			
 
				+
			
 
				+	static uint32_t g_texel_partitions_4x4[1024][2];
			
 
				+
			
 
				+	void precompute_texel_partitions_4x4()
			
 
				+	{
			
 
				+		for (uint32_t p = 0; p < 1024; p++)
			
 
				+		{
			
 
				+			uint32_t v2 = 0, v3 = 0;
			
 
				+
			
 
				+			for (uint32_t y = 0; y < 4; y++)
			
 
				+			{
			
 
				+				for (uint32_t x = 0; x < 4; x++)
			
 
				+				{
			
 
				+					const uint32_t shift = x * 2 + y * 8;
			
 
				+					v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);
			
 
				+					v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			g_texel_partitions_4x4[p][0] = v2;
			
 
				+			g_texel_partitions_4x4[p][1] = v3;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
			
 
				+	{
			
 
				+		assert(g_texel_partitions_4x4[1][0]);
			
 
				+		assert(seed < 1024);
			
 
				+		assert((x <= 3) && (y <= 3));
			
 
				+		assert((num_partitions >= 2) && (num_partitions <= 3));
			
 
				+	
			
 
				+		const uint32_t shift = x * 2 + y * 8;
			
 
				+		return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
			
 
				+	}
			
 
				+
			
 
				+	void blue_contract(
			
 
				+		int r, int g, int b, int a, 
			
 
				+		int &dr, int &dg, int &db, int &da)
			
 
				+	{
			
 
				+		dr = (r + b) >> 1;
			
 
				+		dg = (g + b) >> 1;
			
 
				+		db = b;
			
 
				+		da = a;
			
 
				+	}
			
 
				+
			
 
				+	inline void bit_transfer_signed(int& a, int& b)
			
 
				+	{
			
 
				+		b >>= 1;
			
 
				+		b |= (a & 0x80);
			
 
				+		a >>= 1;
			
 
				+		a &= 0x3F;
			
 
				+		if ((a & 0x20) != 0) 
			
 
				+			a -= 0x40;
			
 
				+	}
			
 
				+
			
 
				+	static inline int clamp(int a, int l, int h)
			
 
				+	{
			
 
				+		if (a < l)
			
 
				+			a = l;
			
 
				+		else if (a > h)
			
 
				+			a = h;
			
 
				+		return a;
			
 
				+	}
			
 
				+
			
 
				+	static inline float clampf(float a, float l, float h)
			
 
				+	{
			
 
				+		if (a < l)
			
 
				+			a = l;
			
 
				+		else if (a > h)
			
 
				+			a = h;
			
 
				+		return a;
			
 
				+	}
			
 
				+
			
 
				+	inline int sign_extend(int src, int num_src_bits)
			
 
				+	{
			
 
				+		assert((num_src_bits >= 2) && (num_src_bits <= 31));
			
 
				+
			
 
				+		const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
			
 
				+		if (negative)
			
 
				+			return src | ~((1 << num_src_bits) - 1);
			
 
				+		else
			
 
				+			return src & ((1 << num_src_bits) - 1);
			
 
				+	}
			
 
				+
			
 
				+	// endpoints is [4][2]
			
 
				+	void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
			
 
				+	{
			
 
				+		assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
			
 
				+
			
 
				+		int v0 = pE[0], v1 = pE[1];
			
 
				+
			
 
				+		int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
			
 
				+		int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
			
 
				+
			
 
				+		switch (cem_index)
			
 
				+		{
			
 
				+		case CEM_LDR_LUM_DIRECT:
			
 
				+		{
			
 
				+			e0_r = v0; e1_r = v1;
			
 
				+			e0_g = v0; e1_g = v1;
			
 
				+			e0_b = v0; e1_b = v1;
			
 
				+			e0_a = 0xFF; e1_a = 0xFF;
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_LUM_BASE_PLUS_OFS:
			
 
				+		{
			
 
				+			int l0 = (v0 >> 2) | (v1 & 0xc0);
			
 
				+			int l1 = l0 + (v1 & 0x3f);
			
 
				+
			
 
				+			if (l1 > 0xFF)
			
 
				+				l1 = 0xFF;
			
 
				+
			
 
				+			e0_r = l0; e1_r = l1;
			
 
				+			e0_g = l0; e1_g = l1;
			
 
				+			e0_b = l0; e1_b = l1;
			
 
				+			e0_a = 0xFF; e1_a = 0xFF;
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_LUM_ALPHA_DIRECT:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3];
			
 
				+
			
 
				+			e0_r = v0; e1_r = v1;
			
 
				+			e0_g = v0; e1_g = v1;
			
 
				+			e0_b = v0; e1_b = v1;
			
 
				+			e0_a = v2; e1_a = v3;
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3];
			
 
				+
			
 
				+			bit_transfer_signed(v1, v0);
			
 
				+			bit_transfer_signed(v3, v2);
			
 
				+
			
 
				+			e0_r = v0; e1_r = v0 + v1;
			
 
				+			e0_g = v0; e1_g = v0 + v1;
			
 
				+			e0_b = v0; e1_b = v0 + v1;
			
 
				+			e0_a = v2; e1_a = v2 + v3;
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 4; c++)
			
 
				+			{
			
 
				+				pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
			
 
				+				pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_RGB_BASE_SCALE:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3];
			
 
				+
			
 
				+			e0_r = (v0 * v3) >> 8; e1_r = v0;
			
 
				+			e0_g = (v1 * v3) >> 8; e1_g = v1;
			
 
				+			e0_b = (v2 * v3) >> 8; e1_b = v2;
			
 
				+			e0_a = 0xFF; e1_a = 0xFF;
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_RGB_DIRECT:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
			
 
				+
			
 
				+			if ((v1 + v3 + v5) >= (v0 + v2 + v4))
			
 
				+			{
			
 
				+				e0_r = v0; e1_r = v1;
			
 
				+				e0_g = v2; e1_g = v3;
			
 
				+				e0_b = v4; e1_b = v5;
			
 
				+				e0_a = 0xFF; e1_a = 0xFF;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
			
 
				+				blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_RGB_BASE_PLUS_OFFSET:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
			
 
				+
			
 
				+			bit_transfer_signed(v1, v0);
			
 
				+			bit_transfer_signed(v3, v2);
			
 
				+			bit_transfer_signed(v5, v4);
			
 
				+
			
 
				+			if ((v1 + v3 + v5) >= 0)
			
 
				+			{
			
 
				+				e0_r = v0; e1_r = v0 + v1;
			
 
				+				e0_g = v2; e1_g = v2 + v3;
			
 
				+				e0_b = v4; e1_b = v4 + v5;
			
 
				+				e0_a = 0xFF; e1_a = 0xFF;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
			
 
				+				blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
			
 
				+			}
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 4; c++)
			
 
				+			{
			
 
				+				pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
			
 
				+				pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
			
 
				+
			
 
				+			e0_r = (v0 * v3) >> 8; e1_r = v0;
			
 
				+			e0_g = (v1 * v3) >> 8; e1_g = v1;
			
 
				+			e0_b = (v2 * v3) >> 8; e1_b = v2;
			
 
				+			e0_a = v4; e1_a = v5;
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_RGBA_DIRECT:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
			
 
				+
			
 
				+			if ((v1 + v3 + v5) >= (v0 + v2 + v4))
			
 
				+			{
			
 
				+				e0_r = v0; e1_r = v1;
			
 
				+				e0_g = v2; e1_g = v3;
			
 
				+				e0_b = v4; e1_b = v5;
			
 
				+				e0_a = v6; e1_a = v7;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
			
 
				+				blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
			
 
				+
			
 
				+			bit_transfer_signed(v1, v0);
			
 
				+			bit_transfer_signed(v3, v2);
			
 
				+			bit_transfer_signed(v5, v4);
			
 
				+			bit_transfer_signed(v7, v6);
			
 
				+
			
 
				+			if ((v1 + v3 + v5) >= 0)
			
 
				+			{
			
 
				+				e0_r = v0; e1_r = v0 + v1;
			
 
				+				e0_g = v2; e1_g = v2 + v3;
			
 
				+				e0_b = v4; e1_b = v4 + v5;
			
 
				+				e0_a = v6; e1_a = v6 + v7;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
			
 
				+				blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
			
 
				+			}
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 4; c++)
			
 
				+			{
			
 
				+				pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
			
 
				+				pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_HDR_LUM_LARGE_RANGE:
			
 
				+		{
			
 
				+			int y0, y1;
			
 
				+			if (v1 >= v0)
			
 
				+			{
			
 
				+				y0 = (v0 << 4);
			
 
				+				y1 = (v1 << 4);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				y0 = (v1 << 4) + 8;
			
 
				+				y1 = (v0 << 4) - 8;
			
 
				+			}
			
 
				+
			
 
				+			e0_r = y0; e1_r = y1;
			
 
				+			e0_g = y0; e1_g = y1;
			
 
				+			e0_b = y0; e1_b = y1;
			
 
				+			e0_a = 0x780; e1_a = 0x780;
			
 
				+						
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_HDR_LUM_SMALL_RANGE:
			
 
				+		{
			
 
				+			int y0, y1, d;
			
 
				+
			
 
				+			if ((v0 & 0x80) != 0)
			
 
				+			{
			
 
				+				y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
			
 
				+				d = (v1 & 0x1F) << 2;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
			
 
				+				d = (v1 & 0x0F) << 1;
			
 
				+			}
			
 
				+						
			
 
				+			y1 = y0 + d;
			
 
				+			if (y1 > 0xFFF) 
			
 
				+				y1 = 0xFFF;
			
 
				+						
			
 
				+			e0_r = y0; e1_r = y1;
			
 
				+			e0_g = y0; e1_g = y1;
			
 
				+			e0_b = y0; e1_b = y1;
			
 
				+			e0_a = 0x780; e1_a = 0x780;
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_HDR_RGB_BASE_SCALE:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3];
			
 
				+						
			
 
				+			int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
			
 
				+			
			
 
				+			int majcomp, mode;
			
 
				+			if ((modeval & 0xC) != 0xC) 
			
 
				+			{
			
 
				+				majcomp = modeval >> 2; 
			
 
				+				mode = modeval & 3;
			
 
				+			}
			
 
				+			else if (modeval != 0xF) 
			
 
				+			{
			
 
				+				majcomp = modeval & 3;  
			
 
				+				mode = 4;
			
 
				+			}
			
 
				+			else 
			
 
				+			{
			
 
				+				majcomp = 0; 
			
 
				+				mode = 5;
			
 
				+			}
			
 
				+
			
 
				+			int red = v0 & 0x3f; 
			
 
				+			int green = v1 & 0x1f;
			
 
				+			int blue = v2 & 0x1f; 
			
 
				+			int scale = v3 & 0x1f;
			
 
				+
			
 
				+			int x0 = (v1 >> 6) & 1; 
			
 
				+			int x1 = (v1 >> 5) & 1; 
			
 
				+			int x2 = (v2 >> 6) & 1;
			
 
				+			int x3 = (v2 >> 5) & 1; 
			
 
				+			int x4 = (v3 >> 7) & 1; 
			
 
				+			int x5 = (v3 >> 6) & 1;
			
 
				+			int x6 = (v3 >> 5) & 1;
			
 
				+
			
 
				+			int ohm = 1 << mode;
			
 
				+			if (ohm & 0x30) green |= x0 << 6;
			
 
				+			if (ohm & 0x3A) green |= x1 << 5;
			
 
				+			if (ohm & 0x30) blue |= x2 << 6;
			
 
				+			if (ohm & 0x3A) blue |= x3 << 5;
			
 
				+			if (ohm & 0x3D) scale |= x6 << 5;
			
 
				+			if (ohm & 0x2D) scale |= x5 << 6;
			
 
				+			if (ohm & 0x04) scale |= x4 << 7;
			
 
				+			if (ohm & 0x3B) red |= x4 << 6;
			
 
				+			if (ohm & 0x04) red |= x3 << 6;
			
 
				+			if (ohm & 0x10) red |= x5 << 7;
			
 
				+			if (ohm & 0x0F) red |= x2 << 7;
			
 
				+			if (ohm & 0x05) red |= x1 << 8;
			
 
				+			if (ohm & 0x0A) red |= x0 << 8;
			
 
				+			if (ohm & 0x05) red |= x0 << 9;
			
 
				+			if (ohm & 0x02) red |= x6 << 9;
			
 
				+			if (ohm & 0x01) red |= x3 << 10;
			
 
				+			if (ohm & 0x02) red |= x5 << 10;
			
 
				+
			
 
				+			static const int s_shamts[6] = { 1,1,2,3,4,5 };
			
 
				+			
			
 
				+			const int shamt = s_shamts[mode];
			
 
				+			red <<= shamt; 
			
 
				+			green <<= shamt; 
			
 
				+			blue <<= shamt; 
			
 
				+			scale <<= shamt;
			
 
				+
			
 
				+			if (mode != 5) 
			
 
				+			{ 
			
 
				+				green = red - green; 
			
 
				+				blue = red - blue; 
			
 
				+			}
			
 
				+
			
 
				+			if (majcomp == 1) 
			
 
				+				std::swap(red, green);
			
 
				+
			
 
				+			if (majcomp == 2) 
			
 
				+				std::swap(red, blue);
			
 
				+						
			
 
				+			e1_r = clamp(red, 0, 0xFFF);
			
 
				+			e1_g = clamp(green, 0, 0xFFF);
			
 
				+			e1_b = clamp(blue, 0, 0xFFF);
			
 
				+			e1_a = 0x780;
			
 
				+
			
 
				+			e0_r = clamp(red - scale, 0, 0xFFF);
			
 
				+			e0_g = clamp(green - scale, 0, 0xFFF);
			
 
				+			e0_b = clamp(blue - scale, 0, 0xFFF);
			
 
				+			e0_a = 0x780;
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		case CEM_HDR_RGB_HDR_ALPHA:
			
 
				+		case CEM_HDR_RGB_LDR_ALPHA:
			
 
				+		case CEM_HDR_RGB:
			
 
				+		{
			
 
				+			int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
			
 
				+
			
 
				+			int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
			
 
				+
			
 
				+			e0_a = 0x780;
			
 
				+			e1_a = 0x780;
			
 
				+
			
 
				+			if (majcomp == 3) 
			
 
				+			{
			
 
				+				e0_r = v0 << 4;
			
 
				+				e0_g = v2 << 4;
			
 
				+				e0_b = (v4 & 0x7f) << 5;
			
 
				+
			
 
				+				e1_r = v1 << 4;
			
 
				+				e1_g = v3 << 4;
			
 
				+				e1_b = (v5 & 0x7f) << 5;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
			
 
				+				int va = v0 | ((v1 & 0x40) << 2);
			
 
				+				int vb0 = v2 & 0x3f;
			
 
				+				int vb1 = v3 & 0x3f;
			
 
				+				int vc = v1 & 0x3f;
			
 
				+				int vd0 = v4 & 0x7f;
			
 
				+				int vd1 = v5 & 0x7f;
			
 
				+
			
 
				+				static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
			
 
				+				vd0 = sign_extend(vd0, s_dbitstab[mode]);
			
 
				+				vd1 = sign_extend(vd1, s_dbitstab[mode]);
			
 
				+
			
 
				+				int x0 = (v2 >> 6) & 1;
			
 
				+				int x1 = (v3 >> 6) & 1;
			
 
				+				int x2 = (v4 >> 6) & 1;
			
 
				+				int x3 = (v5 >> 6) & 1;
			
 
				+				int x4 = (v4 >> 5) & 1;
			
 
				+				int x5 = (v5 >> 5) & 1;
			
 
				+
			
 
				+				int ohm = 1 << mode;
			
 
				+				if (ohm & 0xA4) va |= x0 << 9;
			
 
				+				if (ohm & 0x08) va |= x2 << 9;
			
 
				+				if (ohm & 0x50) va |= x4 << 9;
			
 
				+				if (ohm & 0x50) va |= x5 << 10;
			
 
				+				if (ohm & 0xA0) va |= x1 << 10;
			
 
				+				if (ohm & 0xC0) va |= x2 << 11;
			
 
				+				if (ohm & 0x04) vc |= x1 << 6;
			
 
				+				if (ohm & 0xE8) vc |= x3 << 6;
			
 
				+				if (ohm & 0x20) vc |= x2 << 7;
			
 
				+				if (ohm & 0x5B) vb0 |= x0 << 6;
			
 
				+				if (ohm & 0x5B) vb1 |= x1 << 6;
			
 
				+				if (ohm & 0x12) vb0 |= x2 << 7;
			
 
				+				if (ohm & 0x12) vb1 |= x3 << 7;
			
 
				+
			
 
				+				int shamt = (mode >> 1) ^ 3;
			
 
				+				va  = (uint32_t)va  << shamt;
			
 
				+				vb0 = (uint32_t)vb0 << shamt;
			
 
				+				vb1 = (uint32_t)vb1 << shamt;
			
 
				+				vc  = (uint32_t)vc  << shamt;
			
 
				+				vd0 = (uint32_t)vd0 << shamt;
			
 
				+				vd1 = (uint32_t)vd1 << shamt;
			
 
				+
			
 
				+				e1_r = clamp(va, 0, 0xFFF);
			
 
				+				e1_g = clamp(va - vb0, 0, 0xFFF);
			
 
				+				e1_b = clamp(va - vb1, 0, 0xFFF);
			
 
				+
			
 
				+				e0_r = clamp(va - vc, 0, 0xFFF);
			
 
				+				e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
			
 
				+				e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
			
 
				+
			
 
				+				if (majcomp == 1)
			
 
				+				{
			
 
				+					std::swap(e0_r, e0_g);
			
 
				+					std::swap(e1_r, e1_g);
			
 
				+				}
			
 
				+				else if (majcomp == 2)
			
 
				+				{
			
 
				+					std::swap(e0_r, e0_b);
			
 
				+					std::swap(e1_r, e1_b);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
			
 
				+			{
			
 
				+				int v6 = pE[6], v7 = pE[7];
			
 
				+
			
 
				+				e0_a = v6;
			
 
				+				e1_a = v7;
			
 
				+			}
			
 
				+			else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
			
 
				+			{
			
 
				+				int v6 = pE[6], v7 = pE[7];
			
 
				+
			
 
				+				// Extract mode bits
			
 
				+				int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
			
 
				+				v6 &= 0x7F;
			
 
				+				v7 &= 0x7F;
			
 
				+
			
 
				+				if (mode == 3)
			
 
				+				{
			
 
				+					e0_a = v6 << 5;
			
 
				+					e1_a = v7 << 5;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					v6 |= (v7 << (mode + 1)) & 0x780;
			
 
				+					v7 &= (0x3F >> mode);
			
 
				+					v7 ^= (0x20 >> mode);
			
 
				+					v7 -= (0x20 >> mode);
			
 
				+					v6 <<= (4 - mode); 
			
 
				+					v7 <<= (4 - mode);
			
 
				+
			
 
				+					v7 += v6;
			
 
				+					v7 = clamp(v7, 0, 0xFFF);
			
 
				+					e0_a = v6; 
			
 
				+					e1_a = v7;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			for (uint32_t c = 0; c < 4; c++)
			
 
				+			{
			
 
				+				pEndpoints[c][0] = 0;
			
 
				+				pEndpoints[c][1] = 0;
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		}
			
 
				+	}
			
 
				+		
			
 
				+	static inline bool is_half_inf_or_nan(half_float v)
			
 
				+	{
			
 
				+		return get_bits(v, 10, 14) == 31;
			
 
				+	}
			
 
				+
			
 
				+	// This float->half conversion matches how "F32TO16" works on Intel GPU's.
			
 
				+	half_float float_to_half(float val, bool toward_zero)
			
 
				+	{
			
 
				+		union { float f; int32_t i; uint32_t u; } fi = { val };
			
 
				+		const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
			
 
				+		int s = flt_s, e = 0, m = 0;
			
 
				+
			
 
				+		// inf/NaN
			
 
				+		if (flt_e == 0xff)
			
 
				+		{
			
 
				+			e = 31;
			
 
				+			if (flt_m != 0) // NaN
			
 
				+				m = 1;
			
 
				+		}
			
 
				+		// not zero or denormal
			
 
				+		else if (flt_e != 0)
			
 
				+		{
			
 
				+			int new_exp = flt_e - 127;
			
 
				+			if (new_exp > 15)
			
 
				+				e = 31;
			
 
				+			else if (new_exp < -14)
			
 
				+			{
			
 
				+				if (toward_zero)
			
 
				+					m = (int)truncf((1 << 24) * fabsf(fi.f));
			
 
				+				else
			
 
				+					m = lrintf((1 << 24) * fabsf(fi.f));
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				e = new_exp + 15;
			
 
				+				if (toward_zero)
			
 
				+					m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
			
 
				+				else
			
 
				+					m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		assert((0 <= m) && (m <= 1024));
			
 
				+		if (m == 1024)
			
 
				+		{
			
 
				+			e++;
			
 
				+			m = 0;
			
 
				+		}
			
 
				+
			
 
				+		assert((s >= 0) && (s <= 1));
			
 
				+		assert((e >= 0) && (e <= 31));
			
 
				+		assert((m >= 0) && (m <= 1023));
			
 
				+
			
 
				+		half_float result = (half_float)((s << 15) | (e << 10) | m);
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				+	float half_to_float(half_float hval)
			
 
				+	{
			
 
				+		union { float f; uint32_t u; } x = { 0 };
			
 
				+
			
 
				+		uint32_t s = ((uint32_t)hval >> 15) & 1;
			
 
				+		uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
			
 
				+		uint32_t m = (uint32_t)hval & 0x3FF;
			
 
				+
			
 
				+		if (!e)
			
 
				+		{
			
 
				+			if (!m)
			
 
				+			{
			
 
				+				// +- 0
			
 
				+				x.u = s << 31;
			
 
				+				return x.f;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// denormalized
			
 
				+				while (!(m & 0x00000400))
			
 
				+				{
			
 
				+					m <<= 1;
			
 
				+					--e;
			
 
				+				}
			
 
				+
			
 
				+				++e;
			
 
				+				m &= ~0x00000400;
			
 
				+			}
			
 
				+		}
			
 
				+		else if (e == 31)
			
 
				+		{
			
 
				+			if (m == 0)
			
 
				+			{
			
 
				+				// +/- INF
			
 
				+				x.u = (s << 31) | 0x7f800000;
			
 
				+				return x.f;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// +/- NaN
			
 
				+				x.u = (s << 31) | 0x7f800000 | (m << 13);
			
 
				+				return x.f;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		e = e + (127 - 15);
			
 
				+		m = m << 13;
			
 
				+
			
 
				+		assert(s <= 1);
			
 
				+		assert(m <= 0x7FFFFF);
			
 
				+		assert(e <= 255);
			
 
				+
			
 
				+		x.u = m | (e << 23) | (s << 31);
			
 
				+		return x.f;
			
 
				+	}
			
 
				+
			
 
				+	static inline half_float qlog16_to_half(int k)
			
 
				+	{
			
 
				+		assert((k >= 0) && (k <= 0xFFFF));
			
 
				+
			
 
				+		int E = (k & 0xF800) >> 11;
			
 
				+		int M = k & 0x7FF;
			
 
				+
			
 
				+		int Mt;
			
 
				+		if (M < 512)
			
 
				+			Mt = 3 * M;
			
 
				+		else if (M >= 1536)
			
 
				+			Mt = 5 * M - 2048;
			
 
				+		else
			
 
				+			Mt = 4 * M - 512;
			
 
				+
			
 
				+		return (half_float)((E << 10) + (Mt >> 3));
			
 
				+	}
			
 
				+
			
 
				+	// See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
			
 
				+	const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
			
 
				+	const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
			
 
				+	const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
			
 
				+	const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
			
 
				+	//const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
			
 
				+	const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
			
 
				+		
			
 
				+	void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
			
 
				+	{
			
 
				+		int x = packed & 511;
			
 
				+		int y = (packed >> 9) & 511;
			
 
				+		int z = (packed >> 18) & 511;
			
 
				+		int w = (packed >> 27) & 31;
			
 
				+
			
 
				+		const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
			
 
				+
			
 
				+		r = x * scale;
			
 
				+		g = y * scale;
			
 
				+		b = z * scale;
			
 
				+	}
			
 
				+			
			
 
				+	// floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
			
 
				+	static inline int floor_log2(float x) 
			
 
				+	{
			
 
				+		union float754
			
 
				+		{
			
 
				+			unsigned int raw;
			
 
				+			float value;
			
 
				+		};
			
 
				+
			
 
				+		float754 f;
			
 
				+		f.value = x;
			
 
				+		// Extract float exponent
			
 
				+		return ((f.raw >> 23) & 0xFF) - 127;
			
 
				+	}
			
 
				+
			
 
				+	static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
			
 
				+	static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
			
 
				+
			
 
				+	uint32_t pack_rgb9e5(float r, float g, float b)
			
 
				+	{
			
 
				+		r = clampf(r, 0.0f, MAX_RGB9E5);
			
 
				+		g = clampf(g, 0.0f, MAX_RGB9E5);
			
 
				+		b = clampf(b, 0.0f, MAX_RGB9E5);
			
 
				+
			
 
				+		float maxrgb = maximumf(maximumf(r, g), b);
			
 
				+		int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
			
 
				+		assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
			
 
				+
			
 
				+		float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
			
 
				+
			
 
				+		int maxm = (int)floorf((maxrgb / denom) + 0.5f);
			
 
				+		if (maxm == (MAX_RGB9E5_MANTISSA + 1))
			
 
				+		{
			
 
				+			denom *= 2;
			
 
				+			exp_shared += 1;
			
 
				+			assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
			
 
				+		}
			
 
				+		else 
			
 
				+		{
			
 
				+			assert(maxm <= MAX_RGB9E5_MANTISSA);
			
 
				+		}
			
 
				+
			
 
				+		int rm = (int)floorf((r / denom) + 0.5f);
			
 
				+		int gm = (int)floorf((g / denom) + 0.5f);
			
 
				+		int bm = (int)floorf((b / denom) + 0.5f);
			
 
				+
			
 
				+		assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
			
 
				+		assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
			
 
				+		assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
			
 
				+		
			
 
				+		return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
			
 
				+	}
			
 
				+
			
 
				+	static inline int clz17(uint32_t x)
			
 
				+	{
			
 
				+		assert(x <= 0x1FFFF);
			
 
				+		x &= 0x1FFFF;
			
 
				+
			
 
				+		if (!x)
			
 
				+			return 17;
			
 
				+				
			
 
				+		uint32_t n = 0;
			
 
				+		while ((x & 0x10000) == 0)
			
 
				+		{
			
 
				+			x <<= 1u;
			
 
				+			n++;
			
 
				+		}
			
 
				+
			
 
				+		return n;
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
			
 
				+	{
			
 
				+		int lz = clz17(Cr | Cg | Cb | 1);
			
 
				+		if (Cr == 65535) { Cr = 65536; lz = 0; }
			
 
				+		if (Cg == 65535) { Cg = 65536; lz = 0; }
			
 
				+		if (Cb == 65535) { Cb = 65536; lz = 0; }
			
 
				+		Cr <<= lz; Cg <<= lz; Cb <<= lz;
			
 
				+		Cr = (Cr >> 8) & 0x1FF;
			
 
				+		Cg = (Cg >> 8) & 0x1FF;
			
 
				+		Cb = (Cb >> 8) & 0x1FF;
			
 
				+		uint32_t exponent = 16 - lz;
			
 
				+		uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
			
 
				+		return texel;
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
			
 
				+	{
			
 
				+		if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
			
 
				+		if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
			
 
				+		if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
			
 
				+		int Re = (Cr >> 10) & 0x1F;
			
 
				+		int Ge = (Cg >> 10) & 0x1F;
			
 
				+		int Be = (Cb >> 10) & 0x1F;
			
 
				+		int Rex = (Re == 0) ? 1 : Re;
			
 
				+		int Gex = (Ge == 0) ? 1 : Ge;
			
 
				+		int Bex = (Be == 0) ? 1 : Be;
			
 
				+		int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
			
 
				+		int Xe = Re | Ge | Be;
			
 
				+		uint32_t rshift, gshift, bshift, expo;
			
 
				+
			
 
				+		if (Xe == 0)
			
 
				+		{
			
 
				+			expo = rshift = gshift = bshift = Xm;
			
 
				+		}
			
 
				+		else if (Re >= Ge && Re >= Be)
			
 
				+		{
			
 
				+			expo = Rex + 1;
			
 
				+			rshift = 2;
			
 
				+			gshift = Rex - Gex + 2;
			
 
				+			bshift = Rex - Bex + 2;
			
 
				+		}
			
 
				+		else if (Ge >= Be)
			
 
				+		{
			
 
				+			expo = Gex + 1;
			
 
				+			rshift = Gex - Rex + 2;
			
 
				+			gshift = 2;
			
 
				+			bshift = Gex - Bex + 2;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			expo = Bex + 1;
			
 
				+			rshift = Bex - Rex + 2;
			
 
				+			gshift = Bex - Gex + 2;
			
 
				+			bshift = 2;
			
 
				+		}
			
 
				+
			
 
				+		int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
			
 
				+		int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
			
 
				+		int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
			
 
				+		Rm = (Rm >> rshift) & 0x1FF;
			
 
				+		Gm = (Gm >> gshift) & 0x1FF;
			
 
				+		Bm = (Bm >> bshift) & 0x1FF;
			
 
				+
			
 
				+		uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
			
 
				+		return texel;
			
 
				+	}
			
 
				+		
			
 
				+	// Important: pPixels is either 32-bit/texel or 64-bit/texel.
			
 
				+	bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
			
 
				+	{
			
 
				+		assert(is_valid_block_size(blk_width, blk_height));
			
 
				+				
			
 
				+		assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
			
 
				+		if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
			
 
				+			return false;
			
 
				+
			
 
				+		const uint32_t num_blk_pixels = blk_width * blk_height;
			
 
				+		
			
 
				+		// Write block error color
			
 
				+		if (dec_mode == cDecodeModeHDR16)
			
 
				+		{
			
 
				+			// NaN's
			
 
				+			memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
			
 
				+		}
			
 
				+		else if (dec_mode == cDecodeModeRGB9E5)
			
 
				+		{
			
 
				+			const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
			
 
				+
			
 
				+			for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+				((uint32_t*)pPixels)[i] = purple_9e5;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+				((uint32_t*)pPixels)[i] = 0xFFFF00FF;
			
 
				+		}
			
 
				+
			
 
				+		if (log_blk.m_error_flag)
			
 
				+		{
			
 
				+			// Should this return false? It's not an invalid logical block config, though.
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		// Handle solid color blocks
			
 
				+		if (log_blk.m_solid_color_flag_ldr)
			
 
				+		{
			
 
				+			// LDR solid block
			
 
				+			if (dec_mode == cDecodeModeHDR16)
			
 
				+			{
			
 
				+				// Convert LDR pixels to half-float
			
 
				+				half_float h[4];
			
 
				+				for (uint32_t c = 0; c < 4; c++)
			
 
				+					h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+					memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
			
 
				+			}
			
 
				+			else if (dec_mode == cDecodeModeRGB9E5)
			
 
				+			{
			
 
				+				float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
			
 
				+				float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
			
 
				+				float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
			
 
				+
			
 
				+				const uint32_t packed = pack_rgb9e5(r, g, b);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+					((uint32_t*)pPixels)[i] = packed;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// Convert LDR pixels to 8-bits
			
 
				+				for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+					for (uint32_t c = 0; c < 4; c++)
			
 
				+						((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
			
 
				+			}
			
 
				+
			
 
				+			return true;
			
 
				+		}
			
 
				+		else if (log_blk.m_solid_color_flag_hdr)
			
 
				+		{
			
 
				+			// HDR solid block, decode mode must be half-float or RGB9E5
			
 
				+			if (dec_mode == cDecodeModeHDR16)
			
 
				+			{
			
 
				+				for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+					memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
			
 
				+			}
			
 
				+			else if (dec_mode == cDecodeModeRGB9E5)
			
 
				+			{
			
 
				+				float r = half_to_float(log_blk.m_solid_color[0]);
			
 
				+				float g = half_to_float(log_blk.m_solid_color[1]);
			
 
				+				float b = half_to_float(log_blk.m_solid_color[2]);
			
 
				+				
			
 
				+				const uint32_t packed = pack_rgb9e5(r, g, b);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < num_blk_pixels; i++)
			
 
				+					((uint32_t*)pPixels)[i] = packed;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				return false;
			
 
				+			}
			
 
				+
			
 
				+			return true;
			
 
				+		}
			
 
				+						
			
 
				+		// Sanity check block's config
			
 
				+		if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
			
 
				+			return false;
			
 
				+		if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
			
 
				+			return false;
			
 
				+
			
 
				+		if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
			
 
				+			return false;
			
 
				+		if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
			
 
				+			return false;
			
 
				+		if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
			
 
				+			return false;
			
 
				+		if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
			
 
				+			return false;
			
 
				+		if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
			
 
				+			return false;
			
 
				+		if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
			
 
				+			return false;
			
 
				+		if (log_blk.m_color_component_selector > 3)
			
 
				+			return false;
			
 
				+
			
 
				+		const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
			
 
				+		const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
			
 
				+				
			
 
				+		bool is_ldr_endpoints[MAX_PARTITIONS];
			
 
				+
			
 
				+		// Check CEM's
			
 
				+		uint32_t total_cem_vals = 0;
			
 
				+		for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
			
 
				+		{
			
 
				+			if (log_blk.m_color_endpoint_modes[i] > 15)
			
 
				+				return false;
			
 
				+
			
 
				+			total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
			
 
				+			
			
 
				+			is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
			
 
				+		}
			
 
				+
			
 
				+		if (total_cem_vals > MAX_ENDPOINTS)
			
 
				+			return false;
			
 
				+
			
 
				+		const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
			
 
				+		const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
			
 
				+
			
 
				+		// Dequantized endpoints to [0,255]
			
 
				+		uint8_t dequantized_endpoints[MAX_ENDPOINTS];
			
 
				+		for (uint32_t i = 0; i < total_cem_vals; i++)
			
 
				+		{
			
 
				+			if (log_blk.m_endpoints[i] >= total_endpoint_levels)
			
 
				+				return false;
			
 
				+			dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
			
 
				+		}
			
 
				+				
			
 
				+		// Dequantize weights to [0,64]
			
 
				+		uint8_t dequantized_weights[2][12 * 12];
			
 
				+		
			
 
				+		const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
			
 
				+		const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
			
 
				+		
			
 
				+		const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
			
 
				+		for (uint32_t i = 0; i < total_weight_vals; i++)
			
 
				+		{
			
 
				+			if (log_blk.m_weights[i] >= total_weight_levels)
			
 
				+				return false;
			
 
				+
			
 
				+			const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
			
 
				+			const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
			
 
				+
			
 
				+			dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
			
 
				+		}
			
 
				+
			
 
				+		// Upsample weight grid. [0,64] weights
			
 
				+		uint8_t upsampled_weights[2][12 * 12];
			
 
				+
			
 
				+		upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
			
 
				+		if (log_blk.m_dual_plane)
			
 
				+			upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
			
 
				+
			
 
				+		// Decode CEM's
			
 
				+		int endpoints[4][4][2]; // [subset][comp][l/h]
			
 
				+
			
 
				+		uint32_t endpoint_val_index = 0;
			
 
				+		for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
			
 
				+		{
			
 
				+			const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
			
 
				+
			
 
				+			decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
			
 
				+
			
 
				+			endpoint_val_index += get_num_cem_values(cem_index);
			
 
				+		}
			
 
				+
			
 
				+		// Decode texels
			
 
				+		const bool small_block = num_blk_pixels < 31;
			
 
				+		const bool use_precomputed_texel_partitions = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
			
 
				+		const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
			
 
				+		
			
 
				+		bool success = true;
			
 
				+
			
 
				+		if (dec_mode == cDecodeModeRGB9E5)
			
 
				+		{
			
 
				+			// returns uint32_t's
			
 
				+			for (uint32_t y = 0; y < blk_height; y++)
			
 
				+			{
			
 
				+				for (uint32_t x = 0; x < blk_width; x++)
			
 
				+				{
			
 
				+					const uint32_t pixel_index = x + y * blk_width;
			
 
				+					const uint32_t subset = (log_blk.m_num_partitions > 1) ? 
			
 
				+						(use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
			
 
				+						: 0;
			
 
				+
			
 
				+					int comp[3];
			
 
				+
			
 
				+					for (uint32_t c = 0; c < 3; c++)
			
 
				+					{
			
 
				+						const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
			
 
				+
			
 
				+						if (is_ldr_endpoints[subset])
			
 
				+						{
			
 
				+							assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
			
 
				+							assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
			
 
				+
			
 
				+							int le = endpoints[subset][c][0];
			
 
				+							int he = endpoints[subset][c][1];
			
 
				+
			
 
				+							le = (le << 8) | le;
			
 
				+							he = (he << 8) | he;
			
 
				+
			
 
				+							int k = weight_interpolate(le, he, w);
			
 
				+							assert((k >= 0) && (k <= 0xFFFF));
			
 
				+
			
 
				+							comp[c] = k; // 1.0
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
			
 
				+							assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
			
 
				+
			
 
				+							int le = endpoints[subset][c][0] << 4;
			
 
				+							int he = endpoints[subset][c][1] << 4;
			
 
				+
			
 
				+							int qlog16 = weight_interpolate(le, he, w);
			
 
				+
			
 
				+							comp[c] = qlog16_to_half(qlog16);
			
 
				+
			
 
				+							if (is_half_inf_or_nan((half_float)comp[c]))
			
 
				+								comp[c] = 0x7BFF;
			
 
				+						}
			
 
				+						
			
 
				+					} // c
			
 
				+
			
 
				+					uint32_t packed;
			
 
				+					if (is_ldr_endpoints[subset])
			
 
				+						packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
			
 
				+					else
			
 
				+						packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
			
 
				+
			
 
				+					((uint32_t*)pPixels)[pixel_index] = packed;
			
 
				+
			
 
				+				} // x
			
 
				+			} // y
			
 
				+		}
			
 
				+		else if (dec_mode == cDecodeModeHDR16)
			
 
				+		{
			
 
				+			// Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
			
 
				+			
			
 
				+			// returns half floats
			
 
				+			for (uint32_t y = 0; y < blk_height; y++)
			
 
				+			{
			
 
				+				for (uint32_t x = 0; x < blk_width; x++)
			
 
				+				{
			
 
				+					const uint32_t pixel_index = x + y * blk_width;
			
 
				+					const uint32_t subset = (log_blk.m_num_partitions > 1) ?
			
 
				+						(use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
			
 
				+						: 0;
			
 
				+
			
 
				+					for (uint32_t c = 0; c < 4; c++)
			
 
				+					{
			
 
				+						const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
			
 
				+
			
 
				+						half_float o;
			
 
				+
			
 
				+						if ( (is_ldr_endpoints[subset]) ||
			
 
				+							 ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
			
 
				+						{
			
 
				+							assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
			
 
				+							assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
			
 
				+
			
 
				+							int le = endpoints[subset][c][0];
			
 
				+							int he = endpoints[subset][c][1];
			
 
				+
			
 
				+							le = (le << 8) | le;
			
 
				+							he = (he << 8) | he;
			
 
				+
			
 
				+							int k = weight_interpolate(le, he, w);
			
 
				+							assert((k >= 0) && (k <= 0xFFFF));
			
 
				+
			
 
				+							if (k == 0xFFFF)
			
 
				+								o = 0x3C00; // 1.0
			
 
				+							else
			
 
				+								o = float_to_half((float)k * (1.0f / 65536.0f), true);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
			
 
				+							assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
			
 
				+
			
 
				+							int le = endpoints[subset][c][0] << 4;
			
 
				+							int he = endpoints[subset][c][1] << 4;
			
 
				+
			
 
				+							int qlog16 = weight_interpolate(le, he, w);
			
 
				+							
			
 
				+							o = qlog16_to_half(qlog16);
			
 
				+
			
 
				+							if (is_half_inf_or_nan(o))
			
 
				+								o = 0x7BFF;
			
 
				+						}
			
 
				+												
			
 
				+						((half_float*)pPixels)[pixel_index * 4 + c] = o;
			
 
				+					}
			
 
				+
			
 
				+				} // x
			
 
				+			} // y
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// returns uint8_t's
			
 
				+			for (uint32_t y = 0; y < blk_height; y++)
			
 
				+			{
			
 
				+				for (uint32_t x = 0; x < blk_width; x++)
			
 
				+				{
			
 
				+					const uint32_t pixel_index = x + y * blk_width;
			
 
				+
			
 
				+					const uint32_t subset = (log_blk.m_num_partitions > 1) ?
			
 
				+						(use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
			
 
				+						: 0;
			
 
				+
			
 
				+					if (!is_ldr_endpoints[subset])
			
 
				+					{
			
 
				+						((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;
			
 
				+						success = false;
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						for (uint32_t c = 0; c < 4; c++)
			
 
				+						{
			
 
				+							const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
			
 
				+
			
 
				+							int le = endpoints[subset][c][0];
			
 
				+							int he = endpoints[subset][c][1];
			
 
				+
			
 
				+							// FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
			
 
				+							//if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
			
 
				+							// See https://github.com/ARM-software/astc-encoder/issues/447
			
 
				+							if (dec_mode == cDecodeModeSRGB8)
			
 
				+							{
			
 
				+								le = (le << 8) | 0x80;
			
 
				+								he = (he << 8) | 0x80;
			
 
				+							}
			
 
				+							else
			
 
				+							{
			
 
				+								le = (le << 8) | le;
			
 
				+								he = (he << 8) | he;
			
 
				+							}
			
 
				+
			
 
				+							uint32_t k = weight_interpolate(le, he, w);
			
 
				+
			
 
				+							// FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
			
 
				+							// See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16. 
			
 
				+							// It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
			
 
				+							((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
			
 
				+						}
			
 
				+					}
			
 
				+
			
 
				+				} // x
			
 
				+			} // y
			
 
				+		}
			
 
				+		
			
 
				+		return success;
			
 
				+	}
			
 
				+
			
 
				+	//------------------------------------------------
			
 
				+	// Physical to logical block decoding
			
 
				+
			
 
				+	// unsigned 128-bit int, with some signed helpers
			
 
				+	class uint128
			
 
				+	{
			
 
				+		uint64_t m_lo, m_hi;
			
 
				+
			
 
				+	public:
			
 
				+		uint128() = default;
			
 
				+		inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
			
 
				+		inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
			
 
				+		inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
			
 
				+
			
 
				+		inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
			
 
				+		inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
			
 
				+
			
 
				+		inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
			
 
				+		inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
			
 
				+		inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
			
 
				+		inline explicit operator uint64_t () const { return m_lo; }
			
 
				+
			
 
				+		inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
			
 
				+		inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
			
 
				+
			
 
				+		inline uint64_t get_low() const { return m_lo; }
			
 
				+		inline uint64_t& get_low() { return m_lo; }
			
 
				+
			
 
				+		inline uint64_t get_high() const { return m_hi; }
			
 
				+		inline uint64_t& get_high() { return m_hi; }
			
 
				+
			
 
				+		inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
			
 
				+		inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
			
 
				+
			
 
				+		inline bool operator< (const uint128& rhs) const
			
 
				+		{
			
 
				+			if (m_hi < rhs.m_hi)
			
 
				+				return true;
			
 
				+
			
 
				+			if (m_hi == rhs.m_hi)
			
 
				+			{
			
 
				+				if (m_lo < rhs.m_lo)
			
 
				+					return true;
			
 
				+			}
			
 
				+
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
			
 
				+
			
 
				+		inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
			
 
				+		inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
			
 
				+
			
 
				+		inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
			
 
				+		inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
			
 
				+		inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
			
 
				+		inline explicit operator bool() const { return is_non_zero(); }
			
 
				+		inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
			
 
				+
			
 
				+		inline bool signed_less(const uint128& rhs) const
			
 
				+		{
			
 
				+			const bool l_signed = is_signed(), r_signed = rhs.is_signed();
			
 
				+
			
 
				+			if (l_signed == r_signed)
			
 
				+				return *this < rhs;
			
 
				+
			
 
				+			if (l_signed && !r_signed)
			
 
				+				return true;
			
 
				+
			
 
				+			assert(!l_signed && r_signed);
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
			
 
				+		inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
			
 
				+		inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
			
 
				+
			
 
				+		double get_double() const
			
 
				+		{
			
 
				+			double res = 0;
			
 
				+
			
 
				+			if (m_hi)
			
 
				+				res = (double)m_hi * pow(2.0f, 64.0f);
			
 
				+
			
 
				+			res += (double)m_lo;
			
 
				+
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		double get_signed_double() const
			
 
				+		{
			
 
				+			if (is_signed())
			
 
				+				return -(uint128(*this).abs().get_double());
			
 
				+			else
			
 
				+				return get_double();
			
 
				+		}
			
 
				+
			
 
				+		inline uint128 abs() const
			
 
				+		{
			
 
				+			uint128 res(*this);
			
 
				+			if (res.is_signed())
			
 
				+				res = -res;
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128& operator<<= (int shift)
			
 
				+		{
			
 
				+			assert(shift >= 0);
			
 
				+			if (shift < 0)
			
 
				+				return *this;
			
 
				+
			
 
				+			m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
			
 
				+
			
 
				+			if ((shift) && (shift < 64))
			
 
				+				m_hi |= (m_lo >> (64 - shift));
			
 
				+
			
 
				+			m_lo = (shift >= 64) ? 0 : (m_lo << shift);
			
 
				+
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
			
 
				+
			
 
				+		inline uint128& operator>>= (int shift)
			
 
				+		{
			
 
				+			assert(shift >= 0);
			
 
				+			if (shift < 0)
			
 
				+				return *this;
			
 
				+
			
 
				+			m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
			
 
				+
			
 
				+			if ((shift) && (shift < 64))
			
 
				+				m_lo |= (m_hi << (64 - shift));
			
 
				+
			
 
				+			m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
			
 
				+
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
			
 
				+
			
 
				+		inline uint128 signed_shift_right(int shift) const
			
 
				+		{
			
 
				+			uint128 res(*this);
			
 
				+			res >>= shift;
			
 
				+
			
 
				+			if (is_signed())
			
 
				+			{
			
 
				+				uint128 x(0U);
			
 
				+				x = ~x;
			
 
				+				x >>= shift;
			
 
				+				res |= (~x);
			
 
				+			}
			
 
				+
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
			
 
				+		inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
			
 
				+
			
 
				+		inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
			
 
				+		inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs;	return res; }
			
 
				+
			
 
				+		inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
			
 
				+		inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs;	return res; }
			
 
				+
			
 
				+		inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
			
 
				+
			
 
				+		inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
			
 
				+
			
 
				+		// prefix
			
 
				+		inline uint128 operator ++()
			
 
				+		{
			
 
				+			if (++m_lo == 0)
			
 
				+				++m_hi;
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		// postfix
			
 
				+		inline uint128 operator ++(int)
			
 
				+		{
			
 
				+			uint128 res(*this);
			
 
				+			if (++m_lo == 0)
			
 
				+				++m_hi;
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		// prefix
			
 
				+		inline uint128 operator --()
			
 
				+		{
			
 
				+			const uint64_t t = m_lo;
			
 
				+			if (--m_lo > t)
			
 
				+				--m_hi;
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		// postfix
			
 
				+		inline uint128 operator --(int)
			
 
				+		{
			
 
				+			const uint64_t t = m_lo;
			
 
				+			uint128 res(*this);
			
 
				+			if (--m_lo > t)
			
 
				+				--m_hi;
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128& operator+= (const uint128& rhs)
			
 
				+		{
			
 
				+			const uint64_t t = m_lo + rhs.m_lo;
			
 
				+			m_hi = m_hi + rhs.m_hi + (t < m_lo);
			
 
				+			m_lo = t;
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
			
 
				+
			
 
				+		inline uint128& operator-= (const uint128& rhs)
			
 
				+		{
			
 
				+			const uint64_t t = m_lo - rhs.m_lo;
			
 
				+			m_hi = m_hi - rhs.m_hi - (t > m_lo);
			
 
				+			m_lo = t;
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
			
 
				+
			
 
				+		// computes bit by bit, very slow
			
 
				+		uint128& operator*=(const uint128& rhs)
			
 
				+		{
			
 
				+			uint128 temp(*this), result(0U);
			
 
				+
			
 
				+			for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
			
 
				+				if (bitmask.get_low() & 1)
			
 
				+					result += temp;
			
 
				+
			
 
				+			*this = result;
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
			
 
				+
			
 
				+		// computes bit by bit, very slow
			
 
				+		friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
			
 
				+		{
			
 
				+			remainder = 0;
			
 
				+
			
 
				+			if (!divisor)
			
 
				+			{
			
 
				+				assert(0);
			
 
				+				return ~uint128(0U);
			
 
				+			}
			
 
				+
			
 
				+			uint128 quotient(0), one(1);
			
 
				+
			
 
				+			for (int i = 127; i >= 0; i--)
			
 
				+			{
			
 
				+				remainder = (remainder << 1) | ((dividend >> i) & one);
			
 
				+				if (remainder >= divisor)
			
 
				+				{
			
 
				+					remainder -= divisor;
			
 
				+					quotient |= (one << i);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			return quotient;
			
 
				+		}
			
 
				+
			
 
				+		uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
			
 
				+		uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
			
 
				+
			
 
				+		uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
			
 
				+		uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
			
 
				+
			
 
				+		void print_hex(FILE* pFile) const
			
 
				+		{
			
 
				+			fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
			
 
				+		}
			
 
				+
			
 
				+		void format_unsigned(std::string& res) const
			
 
				+		{
			
 
				+			basisu::vector<uint8_t> digits;
			
 
				+			digits.reserve(39 + 1);
			
 
				+
			
 
				+			uint128 k(*this), ten(10);
			
 
				+			do
			
 
				+			{
			
 
				+				uint128 r;
			
 
				+				k = divide(k, ten, r);
			
 
				+				digits.push_back((uint8_t)r);
			
 
				+			} while (k);
			
 
				+
			
 
				+			for (int i = (int)digits.size() - 1; i >= 0; i--)
			
 
				+				res += ('0' + digits[i]);
			
 
				+		}
			
 
				+
			
 
				+		void format_signed(std::string& res) const
			
 
				+		{
			
 
				+			uint128 val(*this);
			
 
				+
			
 
				+			if (val.is_signed())
			
 
				+			{
			
 
				+				res.push_back('-');
			
 
				+				val = -val;
			
 
				+			}
			
 
				+
			
 
				+			val.format_unsigned(res);
			
 
				+		}
			
 
				+
			
 
				+		void print_unsigned(FILE* pFile)
			
 
				+		{
			
 
				+			std::string str;
			
 
				+			format_unsigned(str);
			
 
				+			fprintf(pFile, "%s", str.c_str());
			
 
				+		}
			
 
				+
			
 
				+		void print_signed(FILE* pFile)
			
 
				+		{
			
 
				+			std::string str;
			
 
				+			format_signed(str);
			
 
				+			fprintf(pFile, "%s", str.c_str());
			
 
				+		}
			
 
				+
			
 
				+		uint128 get_reversed_bits() const
			
 
				+		{
			
 
				+			uint128 res;
			
 
				+
			
 
				+			const uint32_t* pSrc = (const uint32_t*)this;
			
 
				+			uint32_t* pDst = (uint32_t*)&res;
			
 
				+
			
 
				+			pDst[0] = rev_dword(pSrc[3]);
			
 
				+			pDst[1] = rev_dword(pSrc[2]);
			
 
				+			pDst[2] = rev_dword(pSrc[1]);
			
 
				+			pDst[3] = rev_dword(pSrc[0]);
			
 
				+
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		uint128 get_byteswapped() const
			
 
				+		{
			
 
				+			uint128 res;
			
 
				+
			
 
				+			const uint8_t* pSrc = (const uint8_t*)this;
			
 
				+			uint8_t* pDst = (uint8_t*)&res;
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				pDst[i] = pSrc[15 - i];
			
 
				+
			
 
				+			return res;
			
 
				+		}
			
 
				+
			
 
				+		inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
			
 
				+		{
			
 
				+			assert(bit_ofs < 128);
			
 
				+			assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
			
 
				+
			
 
				+			uint128 res(*this);
			
 
				+			res >>= bit_ofs;
			
 
				+
			
 
				+			const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
			
 
				+			return res.get_low() & bitmask;
			
 
				+		}
			
 
				+
			
 
				+		inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
			
 
				+		{
			
 
				+			assert(bit_len <= 32);
			
 
				+			return (uint32_t)get_bits64(bit_ofs, bit_len);
			
 
				+		}
			
 
				+
			
 
				+		inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
			
 
				+		{
			
 
				+			assert(len && (len <= 32));
			
 
				+			uint32_t x = get_bits(bit_ofs, len);
			
 
				+			bit_ofs += len;
			
 
				+			return x;
			
 
				+		}
			
 
				+
			
 
				+		inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
			
 
				+		{
			
 
				+			assert(bit_ofs < 128);
			
 
				+			assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
			
 
				+
			
 
				+			uint128 bitmask(1);
			
 
				+			bitmask = (bitmask << num_bits) - 1;
			
 
				+			assert(uint128(val) <= bitmask);
			
 
				+
			
 
				+			bitmask <<= bit_ofs;
			
 
				+			*this &= ~bitmask;
			
 
				+
			
 
				+			*this = *this | (uint128(val) << bit_ofs);
			
 
				+			return *this;
			
 
				+		}
			
 
				+	};
			
 
				+		
			
 
				+	static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
			
 
				+	{
			
 
				+		if (bits.get_bits(10, 2) != 0b11)
			
 
				+			return false;
			
 
				+
			
 
				+		uint32_t bit_ofs = 12;
			
 
				+		const uint32_t min_s = bits.next_bits(bit_ofs, 13);
			
 
				+		const uint32_t max_s = bits.next_bits(bit_ofs, 13);
			
 
				+		const uint32_t min_t = bits.next_bits(bit_ofs, 13);
			
 
				+		const uint32_t max_t = bits.next_bits(bit_ofs, 13);
			
 
				+		assert(bit_ofs == 64);
			
 
				+		
			
 
				+		const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
			
 
				+		
			
 
				+		if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
			
 
				+			return false;
			
 
				+
			
 
				+		const bool hdr_flag = bits.get_bits(9, 1) != 0;
			
 
				+
			
 
				+		if (hdr_flag)
			
 
				+			log_blk.m_solid_color_flag_hdr = true;
			
 
				+		else
			
 
				+			log_blk.m_solid_color_flag_ldr = true;
			
 
				+
			
 
				+		log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
			
 
				+		log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
			
 
				+		log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
			
 
				+		log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
			
 
				+
			
 
				+		if (log_blk.m_solid_color_flag_hdr)
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 4; c++)
			
 
				+				if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
			
 
				+					return false;
			
 
				+		}
			
 
				+		
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	struct astc_dec_row
			
 
				+	{
			
 
				+		int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
			
 
				+	};
			
 
				+
			
 
				+	static const astc_dec_row s_dec_rows[10] =
			
 
				+	{
			
 
				+		// Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
			
 
				+		{  10,     9,     7,     2,      5,     2,      4,      2,      4,      0,      1      }, // 4 2
			
 
				+		{  10,     9,     7,     2,      5,     2,      8,      2,      4,      0,      1      }, // 8 2 
			
 
				+		{  10,     9,     5,     2,      7,     2,      2,      8,      4,      0,      1      }, // 2 8
			
 
				+		{  10,     9,     5,     2,      7,     1,      2,      6,      4,      0,      1      }, // 2 6
			
 
				+
			
 
				+		{  10,     9,     7,     1,      5,     2,      2,      2,      4,      0,      1      }, // 2 2
			
 
				+		{  10,     9,     0,     0,      5,     2,      12,     2,      4,      2,      3      }, // 12 2
			
 
				+		{  10,     9,     5,     2,      0,     0,      2,     12,      4,      2,      3      }, // 2 12
			
 
				+		{  10,     9,     0,     0,      0,     0,      6,     10,      4,      2,      3      }, // 6 10
			
 
				+
			
 
				+		{  10,     9,     0,     0,      0,     0,      10,    6,       4,      2,      3      }, // 10 6
			
 
				+		{  -1,    -1,     5,     2,      9,     2,      6,     6,       4,      2,      3      }, // 6 6
			
 
				+	};
			
 
				+
			
 
				+	static bool decode_config(const uint128& bits, log_astc_block& log_blk)
			
 
				+	{
			
 
				+		// Reserved
			
 
				+		if (bits.get_bits(0, 4) == 0)
			
 
				+			return false;
			
 
				+
			
 
				+		// Reserved
			
 
				+		if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
			
 
				+		{
			
 
				+			if (bits.get_bits(2, 4) != 0b1111) 
			
 
				+				return false;
			
 
				+		}
			
 
				+
			
 
				+		// Void extent
			
 
				+		if (bits.get_bits(0, 9) == 0b111111100)
			
 
				+			return decode_void_extent(bits, log_blk);
			
 
				+												
			
 
				+		// Check rows
			
 
				+		const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
			
 
				+		const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
			
 
				+		const uint32_t x7_2 = bits.get_bits(7, 2);
			
 
				+
			
 
				+		int row_index = -1;
			
 
				+		if (x0_2 == 0)
			
 
				+		{
			
 
				+			if (x7_2 == 0b00)
			
 
				+				row_index = 5;
			
 
				+			else if (x7_2 == 0b01)
			
 
				+				row_index = 6;
			
 
				+			else if (x5_4 == 0b1100)
			
 
				+				row_index = 7;
			
 
				+			else if (x5_4 == 0b1101)
			
 
				+				row_index = 8;
			
 
				+			else if (x7_2 == 0b10)
			
 
				+				row_index = 9;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (x2_2 == 0b00)
			
 
				+				row_index = 0;
			
 
				+			else if (x2_2 == 0b01)
			
 
				+				row_index = 1;
			
 
				+			else if (x2_2 == 0b10)
			
 
				+				row_index = 2;
			
 
				+			else if ((x2_2 == 0b11) && (x8_1 == 0))
			
 
				+				row_index = 3;
			
 
				+			else if ((x2_2 == 0b11) && (x8_1 == 1))
			
 
				+				row_index = 4;
			
 
				+		}
			
 
				+		if (row_index < 0)
			
 
				+			return false;
			
 
				+
			
 
				+		const astc_dec_row& r = s_dec_rows[row_index];
			
 
				+
			
 
				+		bool P = false, Dp = false;
			
 
				+		uint32_t W = r.W_bias, H = r.H_bias;
			
 
				+
			
 
				+		if (r.P_ofs >= 0)
			
 
				+			P = bits.get_bits(r.P_ofs, 1) != 0;
			
 
				+
			
 
				+		if (r.Dp_ofs >= 0)
			
 
				+			Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
			
 
				+				
			
 
				+		if (r.W_size)
			
 
				+			W += bits.get_bits(r.W_ofs, r.W_size);
			
 
				+
			
 
				+		if (r.H_size)
			
 
				+			H += bits.get_bits(r.H_ofs, r.H_size);
			
 
				+
			
 
				+		assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
			
 
				+		assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
			
 
				+		
			
 
				+		int p0 = bits.get_bits(r.p0_ofs, 1);
			
 
				+		int p1 = bits.get_bits(r.p1_ofs, 1);
			
 
				+		int p2 = bits.get_bits(r.p2_ofs, 1);
			
 
				+
			
 
				+		uint32_t p = p0 | (p1 << 1) | (p2 << 2);
			
 
				+		if (p < 2)
			
 
				+			return false;
			
 
				+		
			
 
				+		log_blk.m_grid_width = W;
			
 
				+		log_blk.m_grid_height = H;
			
 
				+		
			
 
				+		log_blk.m_weight_ise_range = (p - 2) + (P * BISE_10_LEVELS);
			
 
				+		assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
			
 
				+
			
 
				+		log_blk.m_dual_plane = Dp;
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	static inline uint32_t read_le_dword(const uint8_t* pBytes)
			
 
				+	{
			
 
				+		return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
			
 
				+	}
			
 
				+
			
 
				+	// See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
			
 
				+	static const uint8_t s_trit_decode[256][5] =
			
 
				+	{
			
 
				+		{0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
			
 
				+		{0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
			
 
				+		{0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
			
 
				+		{0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
			
 
				+		{0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
			
 
				+		{0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
			
 
				+		{0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
			
 
				+		{0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
			
 
				+		{0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
			
 
				+		{0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
			
 
				+		{0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
			
 
				+		{0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
			
 
				+		{0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
			
 
				+		{0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
			
 
				+		{0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
			
 
				+		{0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
			
 
				+		{0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
			
 
				+		{0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
			
 
				+		{0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
			
 
				+		{0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
			
 
				+		{0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
			
 
				+		{0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
			
 
				+		{0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
			
 
				+		{0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
			
 
				+		{0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
			
 
				+		{0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
			
 
				+		{0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
			
 
				+		{0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
			
 
				+		{0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
			
 
				+		{0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
			
 
				+		{0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
			
 
				+		{0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
			
 
				+	};
			
 
				+
			
 
				+	static const uint8_t s_quint_decode[128][3] =
			
 
				+	{
			
 
				+		{0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
			
 
				+		{0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
			
 
				+		{0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
			
 
				+		{0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
			
 
				+		{0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
			
 
				+		{0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
			
 
				+		{0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
			
 
				+		{0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
			
 
				+		{0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
			
 
				+		{0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
			
 
				+		{0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
			
 
				+		{0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
			
 
				+		{0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
			
 
				+		{0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
			
 
				+		{0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
			
 
				+		{0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
			
 
				+	};
			
 
				+
			
 
				+	static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
			
 
				+	{
			
 
				+		assert((num_vals >= 1) && (num_vals <= 5));
			
 
				+		uint32_t m[5] = { 0 }, T = 0;
			
 
				+
			
 
				+		static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
			
 
				+
			
 
				+		for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
			
 
				+		{
			
 
				+			if (bits_per_val)
			
 
				+				m[c] = bits.next_bits(bit_ofs, bits_per_val);
			
 
				+			T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
			
 
				+			T_ofs += s_t_bits[c];
			
 
				+		}
			
 
				+
			
 
				+		const uint8_t (&p_trits)[5] = s_trit_decode[T];
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_vals; i++)
			
 
				+			pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
			
 
				+	}
			
 
				+
			
 
				+	static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
			
 
				+	{
			
 
				+		assert((num_vals >= 1) && (num_vals <= 3));
			
 
				+		uint32_t m[3] = { 0 }, T = 0;
			
 
				+
			
 
				+		static const uint8_t s_t_bits[3] = { 3, 2, 2 };
			
 
				+
			
 
				+		for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
			
 
				+		{
			
 
				+			if (bits_per_val)
			
 
				+				m[c] = bits.next_bits(bit_ofs, bits_per_val);
			
 
				+			T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
			
 
				+			T_ofs += s_t_bits[c];
			
 
				+		}
			
 
				+
			
 
				+		const uint8_t (&p_quints)[3] = s_quint_decode[T];
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_vals; i++)
			
 
				+			pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
			
 
				+	}
			
 
				+
			
 
				+	static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
			
 
				+	{
			
 
				+		assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
			
 
				+		
			
 
				+		const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
			
 
				+
			
 
				+		if (g_ise_range_table[ise_range][1])
			
 
				+		{
			
 
				+			// Trits+bits, 5 vals per block, 7 bits extra per block
			
 
				+			const uint32_t total_blocks = (num_vals + 4) / 5;
			
 
				+			for (uint32_t b = 0; b < total_blocks; b++)
			
 
				+			{
			
 
				+				const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
			
 
				+				decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
			
 
				+			}
			
 
				+		}
			
 
				+		else if (g_ise_range_table[ise_range][2])
			
 
				+		{
			
 
				+			// Quints+bits, 3 vals per block, 8 bits extra per block
			
 
				+			const uint32_t total_blocks = (num_vals + 2) / 3;
			
 
				+			for (uint32_t b = 0; b < total_blocks; b++)
			
 
				+			{
			
 
				+				const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
			
 
				+				decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			assert(bits_per_val);
			
 
				+
			
 
				+			// Only bits
			
 
				+			for (uint32_t i = 0; i < num_vals; i++)
			
 
				+				pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
			
 
				+	{
			
 
				+		const uint128 bits(
			
 
				+			(uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
			
 
				+			(uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
			
 
				+
			
 
				+		return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
			
 
				+	}
			
 
				+		
			
 
				+	// Decodes a physical ASTC block to a logical ASTC block.
			
 
				+	// blk_width/blk_height are only used to validate the weight grid's dimensions.
			
 
				+	bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
			
 
				+	{
			
 
				+		assert(is_valid_block_size(blk_width, blk_height));
			
 
				+				
			
 
				+		const uint8_t* pS = (uint8_t*)pASTC_block;
			
 
				+
			
 
				+		log_blk.clear();
			
 
				+		log_blk.m_error_flag = true;
			
 
				+		
			
 
				+		const uint128 bits(
			
 
				+			(uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
			
 
				+			(uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
			
 
				+		
			
 
				+		const uint128 rev_bits(bits.get_reversed_bits());
			
 
				+				
			
 
				+		if (!decode_config(bits, log_blk))
			
 
				+			return false;
			
 
				+
			
 
				+		if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
			
 
				+		{
			
 
				+			// Void extent
			
 
				+			log_blk.m_error_flag = false;
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		// Check grid dimensions
			
 
				+		if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
			
 
				+			return false;
			
 
				+		
			
 
				+		// Now we have the grid width/height, dual plane, weight ISE range
			
 
				+		
			
 
				+		const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
			
 
				+		const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
			
 
				+				
			
 
				+		// 18.24 Illegal Encodings
			
 
				+		if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
			
 
				+			return false;
			
 
				+		
			
 
				+		const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
			
 
				+
			
 
				+		uint32_t total_extra_bits = 0;
			
 
				+
			
 
				+		// Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
			
 
				+
			
 
				+		log_blk.m_num_partitions = bits.get_bits(11, 2) + 1;
			
 
				+		if (log_blk.m_num_partitions == 1)
			
 
				+			log_blk.m_color_endpoint_modes[0] = bits.get_bits(13, 4); // read CEM bits
			
 
				+		else
			
 
				+		{
			
 
				+			// 2 or more partitions
			
 
				+			if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
			
 
				+				return false;
			
 
				+
			
 
				+			log_blk.m_partition_id = bits.get_bits(13, 10);
			
 
				+
			
 
				+			uint32_t cem_bits = bits.get_bits(23, 6);
			
 
				+
			
 
				+			if ((cem_bits & 3) == 0)
			
 
				+			{
			
 
				+				// All CEM's the same
			
 
				+				for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
			
 
				+					log_blk.m_color_endpoint_modes[i] = cem_bits >> 2;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// CEM's different, but within up to 2 adjacent classes
			
 
				+				const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
			
 
				+
			
 
				+				total_extra_bits = 3 * log_blk.m_num_partitions - 4;
			
 
				+
			
 
				+				if ((total_weight_bits + total_extra_bits) > 128)
			
 
				+					return false;
			
 
				+
			
 
				+				uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
			
 
				+				
			
 
				+				uint32_t c[4] = { 0 }, m[4] = { 0 };
			
 
				+				
			
 
				+				cem_bits >>= 2;
			
 
				+				for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
			
 
				+					c[i] = cem_bits & 1;
			
 
				+
			
 
				+				switch (log_blk.m_num_partitions)
			
 
				+				{
			
 
				+				case 2:
			
 
				+				{
			
 
				+					m[0] = cem_bits & 3;
			
 
				+					m[1] = bits.next_bits(cem_bit_pos, 2);
			
 
				+					break;
			
 
				+				}
			
 
				+				case 3:
			
 
				+				{
			
 
				+					m[0] = cem_bits & 1;
			
 
				+					m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
			
 
				+					m[1] = bits.next_bits(cem_bit_pos, 2);
			
 
				+					m[2] = bits.next_bits(cem_bit_pos, 2);
			
 
				+					break;
			
 
				+				}
			
 
				+				case 4:
			
 
				+				{
			
 
				+					for (uint32_t i = 0; i < 4; i++)
			
 
				+						m[i] = bits.next_bits(cem_bit_pos, 2);
			
 
				+					break;
			
 
				+				}
			
 
				+				default:
			
 
				+				{
			
 
				+					assert(0);
			
 
				+					break;
			
 
				+				}
			
 
				+				}
			
 
				+
			
 
				+				assert(cem_bit_pos == end_of_weight_bit_ofs);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
			
 
				+				{
			
 
				+					log_blk.m_color_endpoint_modes[i] = first_cem_index + (c[i] * 4) + m[i];
			
 
				+					assert(log_blk.m_color_endpoint_modes[i] <= 15);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Now we have all the CEM indices.
			
 
				+
			
 
				+		if (log_blk.m_dual_plane)
			
 
				+		{
			
 
				+			// Read CCS bits, beneath any CEM bits
			
 
				+			total_extra_bits += 2;
			
 
				+
			
 
				+			if (total_extra_bits > end_of_weight_bit_ofs)
			
 
				+				return false;
			
 
				+
			
 
				+			uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
			
 
				+			log_blk.m_color_component_selector = bits.get_bits(ccs_bit_pos, 2);
			
 
				+		}
			
 
				+
			
 
				+		uint32_t config_bit_pos = 11 + 2; // config+num_parts
			
 
				+		if (log_blk.m_num_partitions == 1)
			
 
				+			config_bit_pos += 4; // CEM bits
			
 
				+		else
			
 
				+			config_bit_pos += 10 + 6; // part_id+CEM bits
			
 
				+
			
 
				+		// config+num_parts+total_extra_bits (CEM extra+CCS)
			
 
				+		uint32_t total_config_bits = config_bit_pos + total_extra_bits;
			
 
				+		
			
 
				+		// Compute number of remaining bits in block
			
 
				+		const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
			
 
				+		if (num_remaining_bits < 0)
			
 
				+			return false;
			
 
				+
			
 
				+		// Compute total number of ISE encoded color endpoint mode values
			
 
				+		uint32_t total_cem_vals = 0;
			
 
				+		for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
			
 
				+			total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
			
 
				+
			
 
				+		if (total_cem_vals > MAX_ENDPOINTS)
			
 
				+			return false;
			
 
				+
			
 
				+		// Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
			
 
				+		int endpoint_ise_range = -1;
			
 
				+		for (int k = 20; k > 0; k--)
			
 
				+		{
			
 
				+			int b = get_ise_sequence_bits(total_cem_vals, k);
			
 
				+			if (b <= num_remaining_bits)
			
 
				+			{
			
 
				+				endpoint_ise_range = k;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
			
 
				+		if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
			
 
				+			return false;
			
 
				+
			
 
				+		log_blk.m_endpoint_ise_range = endpoint_ise_range;
			
 
				+
			
 
				+		// Decode endpoints forwards in block
			
 
				+		decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
			
 
				+
			
 
				+		// Decode grid weights backwards in block
			
 
				+		decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
			
 
				+
			
 
				+		log_blk.m_error_flag = false;
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+		
			
 
				+} // namespace astc_helpers
			
 
				+
			
 
				+#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION
			
--- a/thirdparty/basis_universal/transcoder/basisu_containers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers.h
@@ -188,8 +188,9 @@ namespace basisu
 
				 
			
 
				 #define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag)
			
 
				 
			
 
				-#if defined(__GNUC__) && __GNUC__<5
			
 
				-   #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
			
 
				+#if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && __GNUC__<5
			
 
				+   //#define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
			
 
				+    #define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__)
			
 
				 #else
			
 
				    #define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
			
 
				 #endif
			
@@ -286,8 +287,19 @@ namespace basisu
 
				 
			
 
				          if (BASISU_IS_BITWISE_COPYABLE(T))
			
 
				          {
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
			
 
				+#endif                  
			
 
				+#endif
			
 
				              if ((m_p) && (other.m_p))
			
 
				                 memcpy(m_p, other.m_p, m_size * sizeof(T));
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif                
			
 
				+#endif
			
 
				          }
			
 
				          else
			
 
				          {
			
@@ -330,8 +342,19 @@ namespace basisu
 
				 
			
 
				          if (BASISU_IS_BITWISE_COPYABLE(T))
			
 
				          {
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
			
 
				+#endif         
			
 
				+#endif
			
 
				              if ((m_p) && (other.m_p))
			
 
				                 memcpy(m_p, other.m_p, other.m_size * sizeof(T));
			
 
				+#ifndef __EMSCRIPTEN__          
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif                            
			
 
				+#endif
			
 
				          }
			
 
				          else
			
 
				          {
			
@@ -501,7 +524,7 @@ namespace basisu
 
				 
			
 
				          if (new_capacity > m_capacity)
			
 
				          {
			
 
				-            if (!increase_capacity(new_capacity, false))
			
 
				+            if (!increase_capacity(new_capacity, false, true))
			
 
				                return false;
			
 
				          }
			
 
				          else if (new_capacity < m_capacity)
			
@@ -509,7 +532,8 @@ namespace basisu
 
				             // Must work around the lack of a "decrease_capacity()" method.
			
 
				             // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize.
			
 
				             vector tmp;
			
 
				-            tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false);
			
 
				+            if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true))
			
 
				+                return false;
			
 
				             tmp = *this;
			
 
				             swap(tmp);
			
 
				          }
			
@@ -750,7 +774,21 @@ namespace basisu
 
				             }
			
 
				 
			
 
				             // Copy "down" the objects to preserve, filling in the empty slots.
			
 
				+
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				             memmove(pDst, pSrc, num_to_move * sizeof(T));
			
 
				+
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif            
			
 
				+#endif
			
 
				          }
			
 
				          else
			
 
				          {
			
@@ -1003,7 +1041,21 @@ namespace basisu
 
				       inline void set_all(const T& o)
			
 
				       {
			
 
				          if ((sizeof(T) == 1) && (scalar_type<T>::cFlag))
			
 
				+         {
			
 
				+#ifndef __EMSCRIPTEN__
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
			
 
				+#endif              
			
 
				+#endif
			
 
				             memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size);
			
 
				+
			
 
				+#ifndef __EMSCRIPTEN__            
			
 
				+#ifdef __GNUC__
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif                        
			
 
				+#endif
			
 
				+         }
			
 
				          else
			
 
				          {
			
 
				             T* pDst = m_p;
			
@@ -1029,7 +1081,7 @@ namespace basisu
 
				       // Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method.
			
 
				       inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity)
			
 
				       {
			
 
				-         // To to prevent the caller from obviously shooting themselves in the foot.
			
 
				+         // To prevent the caller from obviously shooting themselves in the foot.
			
 
				          if (((p + capacity) > m_p) && (p < (m_p + m_capacity)))
			
 
				          {
			
 
				             // Can grant ownership of a block inside the container itself!
			
--- a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
@@ -19,23 +19,30 @@ namespace basisu
 
				       if (m_capacity >= min_new_capacity)
			
 
				          return true;
			
 
				 
			
 
				-      size_t new_capacity = min_new_capacity;
			
 
				-      if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity)))
			
 
				-      {
			
 
				-         new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity);
			
 
				-
			
 
				-         assert(new_capacity && (new_capacity > m_capacity));
			
 
				+      uint64_t new_capacity_u64 = min_new_capacity;
			
 
				+      if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64)))
			
 
				+          new_capacity_u64 = helpers::next_pow2(new_capacity_u64);
			
 
				 
			
 
				-         if (new_capacity < min_new_capacity)
			
 
				-         {
			
 
				-            if (nofail)
			
 
				-               return false;
			
 
				-            fprintf(stderr, "vector too large\n");
			
 
				-            abort();
			
 
				-         }
			
 
				+      size_t new_capacity = (size_t)new_capacity_u64;
			
 
				+      if (new_capacity != new_capacity_u64)
			
 
				+      {
			
 
				+          if (nofail)
			
 
				+              return false;
			
 
				+          fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
			
 
				+          abort();
			
 
				       }
			
 
				             
			
 
				-      const size_t desired_size = element_size * new_capacity;
			
 
				+      const uint64_t desired_size_u64 = (uint64_t)element_size * new_capacity;
			
 
				+            
			
 
				+      const size_t desired_size = (size_t)desired_size_u64;
			
 
				+      if (desired_size_u64 != desired_size)
			
 
				+      {
			
 
				+          if (nofail)
			
 
				+              return false;
			
 
				+          fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
			
 
				+          abort();
			
 
				+      }
			
 
				+
			
 
				       size_t actual_size = 0;
			
 
				       if (!pMover)
			
 
				       {
			
@@ -46,11 +53,7 @@ namespace basisu
 
				                return false;
			
 
				 
			
 
				             char buf[256];
			
 
				-#ifdef _MSC_VER
			
 
				-            sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
			
 
				-#else
			
 
				-            sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
			
 
				-#endif
			
 
				+            snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size);
			
 
				             fprintf(stderr, "%s", buf);
			
 
				             abort();
			
 
				          }
			
@@ -75,11 +78,7 @@ namespace basisu
 
				                return false;
			
 
				 
			
 
				             char buf[256];
			
 
				-#ifdef _MSC_VER
			
 
				-            sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
			
 
				-#else
			
 
				-            sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
			
 
				-#endif
			
 
				+            snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size);
			
 
				             fprintf(stderr, "%s", buf);
			
 
				             abort();
			
 
				          }
			
--- a/thirdparty/basis_universal/transcoder/basisu_file_headers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_file_headers.h
@@ -1,5 +1,5 @@
 
				 // basis_file_headers.h
			
 
				-// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -89,7 +89,8 @@ namespace basist
 
				 	enum class basis_tex_format
			
 
				 	{
			
 
				 		cETC1S = 0,
			
 
				-		cUASTC4x4 = 1
			
 
				+		cUASTC4x4 = 1,
			
 
				+		cUASTC_HDR_4x4 = 2
			
 
				 	};
			
 
				 
			
 
				 	struct basis_file_header
			
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
@@ -1,5 +1,5 @@
 
				 // basisu_transcoder.cpp
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
@@ -17,6 +17,11 @@
 
				 #include <limits.h>
			
 
				 #include "basisu_containers_impl.h"
			
 
				 
			
 
				+#define BASISU_ASTC_HELPERS_IMPLEMENTATION
			
 
				+#include "basisu_astc_helpers.h"
			
 
				+
			
 
				+#include "basisu_astc_hdr_core.h"
			
 
				+
			
 
				 #ifndef BASISD_IS_BIG_ENDIAN
			
 
				 // TODO: This doesn't work on OSX. How can this be so difficult?
			
 
				 //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
			
@@ -139,6 +144,10 @@
 
				 	#endif
			
 
				 #endif
			
 
				 
			
 
				+#ifndef BASISD_SUPPORT_UASTC_HDR
			
 
				+	#define BASISD_SUPPORT_UASTC_HDR 1
			
 
				+#endif
			
 
				+
			
 
				 #define BASISD_WRITE_NEW_BC7_MODE5_TABLES			0
			
 
				 #define BASISD_WRITE_NEW_DXT1_TABLES				0
			
 
				 #define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES		0
			
@@ -1908,17 +1917,24 @@ namespace basist
 
				 	void basisu_transcoder_init()
			
 
				 	{
			
 
				 		if (g_transcoder_initialized)
			
 
				-      {
			
 
				-         BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");      
			
 
				+		{
			
 
				+			BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");      
			
 
				 			return;
			
 
				-      }
			
 
				+		}
			
 
				          
			
 
				-     BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");      
			
 
				+		BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");      
			
 
				 
			
 
				 #if BASISD_SUPPORT_UASTC
			
 
				 		uastc_init();
			
 
				 #endif
			
 
				 
			
 
				+#if BASISD_SUPPORT_UASTC_HDR
			
 
				+		// TODO: Examine this, optimize for startup time/mem utilization.
			
 
				+		astc_helpers::init_tables(false);
			
 
				+
			
 
				+		astc_hdr_core_init();
			
 
				+#endif
			
 
				+
			
 
				 #if BASISD_SUPPORT_ASTC
			
 
				 		transcoder_init_astc();
			
 
				 #endif
			
@@ -2027,6 +2043,10 @@ namespace basist
 
				 		transcoder_init_pvrtc2();
			
 
				 #endif
			
 
				 
			
 
				+#if BASISD_SUPPORT_UASTC_HDR
			
 
				+		bc6h_enc_init();
			
 
				+#endif
			
 
				+
			
 
				 		g_transcoder_initialized = true;
			
 
				 	}
			
 
				 
			
@@ -6928,7 +6948,7 @@ namespace basist
 
				 
			
 
				 	static inline int sq(int x) { return x * x; }
			
 
				 						
			
 
				-	// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. 
			
 
				+	// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0. 
			
 
				 	// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! 
			
 
				 	// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
			
 
				 	static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
			
@@ -7515,6 +7535,8 @@ namespace basist
 
				 	}
			
 
				 #endif // BASISD_SUPPORT_PVRTC2
			
 
				 
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+
			
 
				 	basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
			
 
				 		m_pGlobal_codebook(nullptr),
			
 
				 		m_selector_history_buf_size(0)
			
@@ -8620,7 +8642,7 @@ namespace basist
 
				 			// Now make sure the output buffer is large enough, or we'll overwrite memory.
			
 
				 			if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
			
 
				 			{
			
 
				-				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
			
 
				+				BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
			
 
				 				return false;
			
 
				 			}
			
 
				 		}
			
@@ -8632,7 +8654,7 @@ namespace basist
 
				 
			
 
				 			if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
			
 
				 			{
			
 
				-				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
			
 
				+				BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
			
 
				 				return false;
			
 
				 			}
			
 
				 		}
			
@@ -8640,7 +8662,7 @@ namespace basist
 
				 		{
			
 
				 			if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
			
 
				 			{
			
 
				-				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
			
 
				+				BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
			
 
				 				return false;
			
 
				 			}
			
 
				 		}
			
@@ -9242,13 +9264,17 @@ namespace basist
 
				 
			
 
				 		return status;
			
 
				 	}
			
 
				+
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				 	
			
 
				 	basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
			
 
				 	{
			
 
				 	}
			
 
				 
			
 
				-	bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
			
 
				-        uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
			
 
				+	bool basisu_lowlevel_uastc_transcoder::transcode_slice(
			
 
				+		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
			
 
				+        uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, 
			
 
				+		const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
			
 
				 		basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
			
 
				 	{
			
 
				 		BASISU_NOTE_UNUSED(pState);
			
@@ -9784,6 +9810,317 @@ namespace basist
 
				 
			
 
				 		return status;
			
 
				 	}
			
 
				+
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder()
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice(
			
 
				+		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
			
 
				+		uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, 
			
 
				+		const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
			
 
				+		basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
			
 
				+	{
			
 
				+		BASISU_NOTE_UNUSED(pState);
			
 
				+		BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
			
 
				+		BASISU_NOTE_UNUSED(has_alpha);
			
 
				+		BASISU_NOTE_UNUSED(channel0);
			
 
				+		BASISU_NOTE_UNUSED(channel1);
			
 
				+		BASISU_NOTE_UNUSED(decode_flags);
			
 
				+
			
 
				+		assert(g_transcoder_initialized);
			
 
				+		if (!g_transcoder_initialized)
			
 
				+		{
			
 
				+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+#if BASISD_SUPPORT_UASTC_HDR
			
 
				+		const uint32_t total_blocks = num_blocks_x * num_blocks_y;
			
 
				+
			
 
				+		if (!output_row_pitch_in_blocks_or_pixels)
			
 
				+		{
			
 
				+			if (basis_block_format_is_uncompressed(fmt))
			
 
				+				output_row_pitch_in_blocks_or_pixels = orig_width;
			
 
				+			else
			
 
				+				output_row_pitch_in_blocks_or_pixels = num_blocks_x;
			
 
				+		}
			
 
				+
			
 
				+		if (basis_block_format_is_uncompressed(fmt))
			
 
				+		{
			
 
				+			if (!output_rows_in_pixels)
			
 
				+				output_rows_in_pixels = orig_height;
			
 
				+		}
			
 
				+
			
 
				+		uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
			
 
				+		if (image_data_size < total_expected_block_bytes)
			
 
				+		{
			
 
				+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
			
 
				+
			
 
				+		bool status = false;
			
 
				+
			
 
				+		// TODO: Optimize pure memcpy() case.
			
 
				+			
			
 
				+		for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
			
 
				+		{
			
 
				+			void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
			
 
				+
			
 
				+			for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
			
 
				+			{
			
 
				+				switch (fmt)
			
 
				+				{
			
 
				+				case block_format::cUASTC_HDR_4x4:
			
 
				+				case block_format::cASTC_HDR_4x4:
			
 
				+				{
			
 
				+					// Nothing to do, UASTC HDR is just ASTC.
			
 
				+					memcpy(pDst_block, pSource_block, sizeof(uastc_block));
			
 
				+					status = true;
			
 
				+					break;
			
 
				+				}
			
 
				+				case block_format::cBC6H:
			
 
				+				{
			
 
				+					status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
			
 
				+					break;
			
 
				+				}
			
 
				+				case block_format::cRGB_9E5:
			
 
				+				{
			
 
				+					astc_helpers::log_astc_block log_blk;
			
 
				+					status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
			
 
				+					if (status)
			
 
				+					{
			
 
				+						uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
			
 
				+							static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
			
 
				+							);
			
 
				+
			
 
				+						uint32_t blk_texels[4][4];
			
 
				+
			
 
				+						status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
			
 
				+						
			
 
				+						if (status)
			
 
				+						{
			
 
				+							const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
			
 
				+							const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
			
 
				+
			
 
				+							for (uint32_t y = 0; y < max_y; y++)
			
 
				+							{
			
 
				+								memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
			
 
				+
			
 
				+								pDst_pixels += output_row_pitch_in_blocks_or_pixels;
			
 
				+							} // y
			
 
				+						}
			
 
				+					}
			
 
				+					
			
 
				+					break;
			
 
				+				}
			
 
				+				case block_format::cRGBA_HALF:
			
 
				+				{
			
 
				+					astc_helpers::log_astc_block log_blk;
			
 
				+					status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
			
 
				+					if (status)
			
 
				+					{
			
 
				+						half_float* pDst_pixels = reinterpret_cast<half_float*>(
			
 
				+							static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
			
 
				+							);
			
 
				+												
			
 
				+						half_float blk_texels[4][4][4];
			
 
				+						status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
			
 
				+
			
 
				+						if (status)
			
 
				+						{
			
 
				+							const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
			
 
				+							const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
			
 
				+
			
 
				+							for (uint32_t y = 0; y < max_y; y++)
			
 
				+							{
			
 
				+								for (uint32_t x = 0; x < max_x; x++)
			
 
				+								{
			
 
				+									pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
			
 
				+									pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
			
 
				+									pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
			
 
				+									pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
			
 
				+								} // x
			
 
				+
			
 
				+								pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
			
 
				+							} // y
			
 
				+						}
			
 
				+					}
			
 
				+
			
 
				+					break;
			
 
				+				}
			
 
				+				case block_format::cRGB_HALF:
			
 
				+				{
			
 
				+					astc_helpers:: log_astc_block log_blk;
			
 
				+					status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
			
 
				+					if (status)
			
 
				+					{
			
 
				+						half_float* pDst_pixels =
			
 
				+							reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
			
 
				+
			
 
				+						half_float blk_texels[4][4][4];
			
 
				+						status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
			
 
				+						if (status)
			
 
				+						{
			
 
				+							const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
			
 
				+							const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
			
 
				+
			
 
				+							for (uint32_t y = 0; y < max_y; y++)
			
 
				+							{
			
 
				+								for (uint32_t x = 0; x < max_x; x++)
			
 
				+								{
			
 
				+									pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
			
 
				+									pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
			
 
				+									pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
			
 
				+								} // x
			
 
				+
			
 
				+								pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
			
 
				+							} // y
			
 
				+						}
			
 
				+					}
			
 
				+
			
 
				+					break;
			
 
				+				}
			
 
				+				default:
			
 
				+					assert(0);
			
 
				+					break;
			
 
				+
			
 
				+				}
			
 
				+
			
 
				+				if (!status)
			
 
				+				{
			
 
				+					BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n");					return false;
			
 
				+				}
			
 
				+
			
 
				+			} // block_x
			
 
				+
			
 
				+		} // block_y
			
 
				+
			
 
				+		return true;
			
 
				+#else
			
 
				+		BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
			
 
				+
			
 
				+		BASISU_NOTE_UNUSED(decode_flags);
			
 
				+		BASISU_NOTE_UNUSED(channel0);
			
 
				+		BASISU_NOTE_UNUSED(channel1);
			
 
				+		BASISU_NOTE_UNUSED(output_rows_in_pixels);
			
 
				+		BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
			
 
				+		BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
			
 
				+		BASISU_NOTE_UNUSED(fmt);
			
 
				+		BASISU_NOTE_UNUSED(image_data_size);
			
 
				+		BASISU_NOTE_UNUSED(pImage_data);
			
 
				+		BASISU_NOTE_UNUSED(num_blocks_x);
			
 
				+		BASISU_NOTE_UNUSED(num_blocks_y);
			
 
				+		BASISU_NOTE_UNUSED(pDst_blocks);
			
 
				+
			
 
				+		return false;
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image(
			
 
				+		transcoder_texture_format target_format,
			
 
				+		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
			
 
				+		const uint8_t* pCompressed_data, uint32_t compressed_data_length,
			
 
				+		uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
			
 
				+		uint32_t slice_offset, uint32_t slice_length,
			
 
				+		uint32_t decode_flags,
			
 
				+		bool has_alpha,
			
 
				+		bool is_video,
			
 
				+		uint32_t output_row_pitch_in_blocks_or_pixels,
			
 
				+		basisu_transcoder_state* pState,
			
 
				+		uint32_t output_rows_in_pixels,
			
 
				+		int channel0, int channel1)
			
 
				+	{
			
 
				+		BASISU_NOTE_UNUSED(is_video);
			
 
				+		BASISU_NOTE_UNUSED(level_index);
			
 
				+		BASISU_NOTE_UNUSED(decode_flags);
			
 
				+
			
 
				+		if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
			
 
				+		{
			
 
				+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
			
 
				+		const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
			
 
				+
			
 
				+		if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
			
 
				+		{
			
 
				+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n");
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		bool status = false;
			
 
				+
			
 
				+		switch (target_format)
			
 
				+		{
			
 
				+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
			
 
				+		{
			
 
				+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
			
 
				+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
			
 
				+
			
 
				+			if (!status)
			
 
				+			{
			
 
				+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		case transcoder_texture_format::cTFBC6H:
			
 
				+		{
			
 
				+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
			
 
				+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
			
 
				+			if (!status)
			
 
				+			{
			
 
				+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		case transcoder_texture_format::cTFRGB_HALF:
			
 
				+		{
			
 
				+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
			
 
				+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
			
 
				+			if (!status)
			
 
				+			{
			
 
				+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+		{
			
 
				+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
			
 
				+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
			
 
				+			if (!status)
			
 
				+			{
			
 
				+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		case transcoder_texture_format::cTFRGB_9E5:
			
 
				+		{
			
 
				+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
			
 
				+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
			
 
				+			if (!status)
			
 
				+			{
			
 
				+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n");
			
 
				+			break;
			
 
				+		}
			
 
				+		}
			
 
				+
			
 
				+		return status;
			
 
				+	}
			
 
				+
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				 	
			
 
				 	basisu_transcoder::basisu_transcoder() :
			
 
				 		m_ready_to_transcode(false)
			
@@ -10390,7 +10727,7 @@ namespace basist
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			// Nothing special to do for UASTC.
			
 
				+			// Nothing special to do for UASTC/UASTC HDR.
			
 
				 			if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
			
 
				 			{
			
 
				 				m_lowlevel_etc1s_decoder.clear();
			
@@ -10510,7 +10847,14 @@ namespace basist
 
				 			return false;
			
 
				 		}
			
 
				 				
			
 
				-		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
			
 
				+		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
			
 
				+		{
			
 
				+			return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
			
 
				+				pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
			
 
				+				fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
			
 
				+				output_rows_in_pixels, channel0, channel1, decode_flags);
			
 
				+		}
			
 
				+		else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
			
 
				 		{
			
 
				 			return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
			
 
				 				pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
			
@@ -10742,7 +11086,18 @@ namespace basist
 
				 			memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
			
 
				 		}
			
 
				 		
			
 
				-		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
			
 
				+		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
			
 
				+		{
			
 
				+			const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
			
 
				+
			
 
				+			// Use the container independent image transcode method.
			
 
				+			status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt,
			
 
				+				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
			
 
				+				(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
			
 
				+				pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
			
 
				+				decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
			
 
				+		}
			
 
				+		else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
			
 
				 		{
			
 
				 			const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
			
 
				 
			
@@ -10808,20 +11163,27 @@ namespace basist
 
				 			return 8;
			
 
				 		case transcoder_texture_format::cTFBC7_RGBA:
			
 
				 		case transcoder_texture_format::cTFBC7_ALT:
			
 
				+		case transcoder_texture_format::cTFBC6H:
			
 
				 		case transcoder_texture_format::cTFETC2_RGBA:
			
 
				 		case transcoder_texture_format::cTFBC3_RGBA:
			
 
				 		case transcoder_texture_format::cTFBC5_RG:
			
 
				 		case transcoder_texture_format::cTFASTC_4x4_RGBA:
			
 
				+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
			
 
				 		case transcoder_texture_format::cTFATC_RGBA:
			
 
				 		case transcoder_texture_format::cTFFXT1_RGB:
			
 
				 		case transcoder_texture_format::cTFETC2_EAC_RG11:
			
 
				 			return 16;
			
 
				 		case transcoder_texture_format::cTFRGBA32:
			
 
				+		case transcoder_texture_format::cTFRGB_9E5:
			
 
				 			return sizeof(uint32_t);
			
 
				 		case transcoder_texture_format::cTFRGB565:
			
 
				 		case transcoder_texture_format::cTFBGR565:
			
 
				 		case transcoder_texture_format::cTFRGBA4444:
			
 
				 			return sizeof(uint16_t);
			
 
				+		case transcoder_texture_format::cTFRGB_HALF:
			
 
				+			return sizeof(half_float) * 3;
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+			return sizeof(half_float) * 4;
			
 
				 		default:
			
 
				 			assert(0);
			
 
				 			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
			
@@ -10845,17 +11207,22 @@ namespace basist
 
				 		case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
			
 
				 		case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
			
 
				 		case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
			
 
				+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA";
			
 
				 		case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
			
 
				 		case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
			
 
				 		case transcoder_texture_format::cTFRGBA32: return "RGBA32";
			
 
				 		case transcoder_texture_format::cTFRGB565: return "RGB565";
			
 
				 		case transcoder_texture_format::cTFBGR565: return "BGR565";
			
 
				 		case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
			
 
				+		case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
			
 
				+		case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
			
 
				 		case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
			
 
				 		case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
			
 
				 		case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
			
 
				 		case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
			
 
				 		case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
			
 
				+		case transcoder_texture_format::cTFBC6H: return "BC6H";
			
 
				 		default:
			
 
				 			assert(0);
			
 
				 			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
			
@@ -10881,7 +11248,13 @@ namespace basist
 
				 		case block_format::cRGB565: return "RGB565";
			
 
				 		case block_format::cBGR565: return "BGR565";
			
 
				 		case block_format::cRGBA4444: return "RGBA4444";
			
 
				+		case block_format::cRGBA_HALF: return "RGBA_HALF";
			
 
				+		case block_format::cRGB_HALF: return "RGB_HALF";
			
 
				+		case block_format::cRGB_9E5: return "RGB_9E5";
			
 
				 		case block_format::cUASTC_4x4: return "UASTC_4x4";
			
 
				+		case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
			
 
				+		case block_format::cBC6H: return "BC6H";
			
 
				+		case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
			
 
				 		case block_format::cFXT1_RGB: return "FXT1_RGB";
			
 
				 		case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
			
 
				 		case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
			
@@ -10914,11 +11287,13 @@ namespace basist
 
				 
			
 
				 	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
			
 
				 	{
			
 
				+		// TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here.
			
 
				 		switch (fmt)
			
 
				 		{
			
 
				 		case transcoder_texture_format::cTFETC2_RGBA:
			
 
				 		case transcoder_texture_format::cTFBC3_RGBA:
			
 
				 		case transcoder_texture_format::cTFASTC_4x4_RGBA:
			
 
				+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
			
 
				 		case transcoder_texture_format::cTFBC7_RGBA:
			
 
				 		case transcoder_texture_format::cTFBC7_ALT:
			
 
				 		case transcoder_texture_format::cTFPVRTC1_4_RGBA:
			
@@ -10926,6 +11301,23 @@ namespace basist
 
				 		case transcoder_texture_format::cTFATC_RGBA:
			
 
				 		case transcoder_texture_format::cTFRGBA32:
			
 
				 		case transcoder_texture_format::cTFRGBA4444:
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+			return true;
			
 
				+		default:
			
 
				+			break;
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
			
 
				+	{
			
 
				+		switch (fmt)
			
 
				+		{
			
 
				+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
			
 
				+		case transcoder_texture_format::cTFBC6H:
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+		case transcoder_texture_format::cTFRGB_HALF:
			
 
				+		case transcoder_texture_format::cTFRGB_9E5:
			
 
				 			return true;
			
 
				 		default:
			
 
				 			break;
			
@@ -10947,13 +11339,18 @@ namespace basist
 
				 		case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
			
 
				 		case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
			
 
				 		case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
			
 
				-		case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
			
 
				+		case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
			
 
				+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
			
 
				+		case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
			
 
				 		case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
			
 
				 		case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
			
 
				 		case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
			
 
				 		case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
			
 
				 		case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
			
 
				 		case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
			
 
				+		case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
			
 
				+		case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
			
 
				 		case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
			
 
				 		case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
			
 
				 		case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
			
@@ -10975,6 +11372,9 @@ namespace basist
 
				 		case transcoder_texture_format::cTFRGB565:
			
 
				 		case transcoder_texture_format::cTFBGR565:
			
 
				 		case transcoder_texture_format::cTFRGBA4444:
			
 
				+		case transcoder_texture_format::cTFRGB_HALF:
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+		case transcoder_texture_format::cTFRGB_9E5:
			
 
				 			return true;
			
 
				 		default:
			
 
				 			break;
			
@@ -10995,6 +11395,9 @@ namespace basist
 
				 		case block_format::cRGBA4444_COLOR:
			
 
				 		case block_format::cRGBA4444_ALPHA:
			
 
				 		case block_format::cRGBA4444_COLOR_OPAQUE:
			
 
				+		case block_format::cRGBA_HALF:
			
 
				+		case block_format::cRGB_HALF:
			
 
				+		case block_format::cRGB_9E5:
			
 
				 			return true;
			
 
				 		default:
			
 
				 			break;
			
@@ -11007,11 +11410,16 @@ namespace basist
 
				 		switch (fmt)
			
 
				 		{
			
 
				 		case transcoder_texture_format::cTFRGBA32:
			
 
				+		case transcoder_texture_format::cTFRGB_9E5:
			
 
				 			return sizeof(uint32_t); 
			
 
				 		case transcoder_texture_format::cTFRGB565:
			
 
				 		case transcoder_texture_format::cTFBGR565:
			
 
				 		case transcoder_texture_format::cTFRGBA4444:
			
 
				 			return sizeof(uint16_t);
			
 
				+		case transcoder_texture_format::cTFRGB_HALF:
			
 
				+			return sizeof(half_float) * 3;
			
 
				+		case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+			return sizeof(half_float) * 4;
			
 
				 		default:
			
 
				 			break;
			
 
				 		}
			
@@ -11038,8 +11446,26 @@ namespace basist
 
				 	
			
 
				 	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
			
 
				 	{
			
 
				-		if (fmt == basis_tex_format::cUASTC4x4)
			
 
				+		if (fmt == basis_tex_format::cUASTC_HDR_4x4)
			
 
				+		{
			
 
				+			// UASTC HDR
			
 
				+#if BASISD_SUPPORT_UASTC_HDR
			
 
				+			switch (tex_type)
			
 
				+			{
			
 
				+			case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
			
 
				+			case transcoder_texture_format::cTFBC6H:
			
 
				+			case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+			case transcoder_texture_format::cTFRGB_HALF:
			
 
				+			case transcoder_texture_format::cTFRGB_9E5:
			
 
				+				return true;
			
 
				+			default:
			
 
				+				break;
			
 
				+			}
			
 
				+#endif
			
 
				+		}
			
 
				+		else if (fmt == basis_tex_format::cUASTC4x4)
			
 
				 		{
			
 
				+			// UASTC LDR
			
 
				 #if BASISD_SUPPORT_UASTC
			
 
				 			switch (tex_type)
			
 
				 			{
			
@@ -11049,6 +11475,12 @@ namespace basist
 
				 			case transcoder_texture_format::cTFATC_RGB:
			
 
				 			case transcoder_texture_format::cTFATC_RGBA:
			
 
				 			case transcoder_texture_format::cTFFXT1_RGB:
			
 
				+			// UASTC LDR doesn't support transcoding to HDR formats
			
 
				+			case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
			
 
				+			case transcoder_texture_format::cTFBC6H:
			
 
				+			case transcoder_texture_format::cTFRGBA_HALF:
			
 
				+			case transcoder_texture_format::cTFRGB_HALF:
			
 
				+			case transcoder_texture_format::cTFRGB_9E5:
			
 
				 				return false;
			
 
				 			default:
			
 
				 				return true;
			
@@ -11057,6 +11489,7 @@ namespace basist
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				+			// ETC1S
			
 
				 			switch (tex_type)
			
 
				 			{
			
 
				 				// ETC1 and uncompressed are always supported.
			
@@ -11812,7 +12245,7 @@ namespace basist
 
				 	// Encodes 3 values to output, usable for any range that uses quints and bits
			
 
				 	static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
			
 
				 	{
			
 
				-		// First extract the trits and the bits from the 5 input values
			
 
				+		// First extract the quints and the bits from the 3 input values
			
 
				 		int quints = 0, bits[3];
			
 
				 		const uint32_t bit_mask = (1 << n) - 1;
			
 
				 		for (int i = 0; i < 3; i++)
			
@@ -12131,11 +12564,13 @@ namespace basist
 
				 
			
 
				 			return bits & ((1U << codesize) - 1U);
			
 
				 		}
			
 
				-
			
 
				-		uint32_t byte_bit_offset = bit_offset & 7U;
			
 
				-		const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
			
 
				-		bit_offset += codesize;
			
 
				-		return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
			
 
				+		else
			
 
				+		{
			
 
				+			uint32_t byte_bit_offset = bit_offset & 7U;
			
 
				+			const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
			
 
				+			bit_offset += codesize;
			
 
				+			return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
			
@@ -12170,6 +12605,7 @@ namespace basist
 
				 			return false;
			
 
				 
			
 
				 		unpacked.m_mode = mode;
			
 
				+		unpacked.m_common_pattern = 0;
			
 
				 
			
 
				 		uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
			
 
				 
			
@@ -16663,10 +17099,12 @@ namespace basist
 
				 
			
 
				 		memcpy(&m_header, pData, sizeof(m_header));
			
 
				 
			
 
				-		// We only support UASTC and ETC1S
			
 
				-		if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
			
 
				+		// We only support UASTC LDR, UASTC HDR and ETC1S.
			
 
				+		// Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently).
			
 
				+		if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) && 
			
 
				+			(m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK))
			
 
				 		{
			
 
				-			BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
			
 
				+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
			
 
				 			return false;
			
 
				 		}
			
 
				 
			
@@ -16890,6 +17328,16 @@ namespace basist
 
				 			// We're assuming "DATA" means RGBA so it has alpha.
			
 
				 			m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
			
 
				 		}
			
 
				+		else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR)
			
 
				+		{
			
 
				+			m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
			
 
				+
			
 
				+			m_dfd_samples = 1;
			
 
				+			m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
			
 
				+
			
 
				+			// We're assuming "DATA" means RGBA so it has alpha.
			
 
				+			m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
			
 
				+		}
			
 
				 		else
			
 
				 		{
			
 
				 			// Unsupported DFD color model.
			
@@ -17167,7 +17615,8 @@ namespace basist
 
				 				return false;
			
 
				 			}
			
 
				 		}
			
 
				-		else if (m_format == basist::basis_tex_format::cUASTC4x4)
			
 
				+		else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
			
 
				+			     (m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
			
 
				 		{
			
 
				 			// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
			
 
				 			assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
			
@@ -17188,14 +17637,29 @@ namespace basist
 
				 				return false;
			
 
				 			}
			
 
				 
			
 
				-			if (!m_uastc_transcoder.transcode_image(fmt,
			
 
				-				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
			
 
				-				(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
			
 
				-				0, (uint32_t)total_2D_image_size,
			
 
				-				decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
			
 
				+			if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
			
 
				 			{
			
 
				-				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
			
 
				-				return false;
			
 
				+				if (!m_uastc_hdr_transcoder.transcode_image(fmt,
			
 
				+					pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
			
 
				+					(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
			
 
				+					0, (uint32_t)total_2D_image_size,
			
 
				+					decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
			
 
				+				{
			
 
				+					BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
			
 
				+					return false;
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (!m_uastc_transcoder.transcode_image(fmt,
			
 
				+					pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
			
 
				+					(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
			
 
				+					0, (uint32_t)total_2D_image_size,
			
 
				+					decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
			
 
				+				{
			
 
				+					BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
			
 
				+					return false;
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 		else
			
@@ -17476,4 +17940,1531 @@ namespace basist
 
				 #endif
			
 
				 	}
			
 
				 
			
 
				+	//-------------------------------
			
 
				+
			
 
				+#ifdef BASISD_SUPPORT_UASTC_HDR
			
 
				+	// This float->half conversion matches how "F32TO16" works on Intel GPU's.
			
 
				+	basist::half_float float_to_half(float val)
			
 
				+	{
			
 
				+		union { float f; int32_t i; uint32_t u; } fi = { val };
			
 
				+		const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
			
 
				+		int s = flt_s, e = 0, m = 0;
			
 
				+
			
 
				+		// inf/NaN
			
 
				+		if (flt_e == 0xff)
			
 
				+		{
			
 
				+			e = 31;
			
 
				+			if (flt_m != 0) // NaN
			
 
				+				m = 1;
			
 
				+		}
			
 
				+		// not zero or denormal
			
 
				+		else if (flt_e != 0)
			
 
				+		{
			
 
				+			int new_exp = flt_e - 127;
			
 
				+			if (new_exp > 15)
			
 
				+				e = 31;
			
 
				+			else if (new_exp < -14)
			
 
				+				m = lrintf((1 << 24) * fabsf(fi.f));
			
 
				+			else
			
 
				+			{
			
 
				+				e = new_exp + 15;
			
 
				+				m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		assert((0 <= m) && (m <= 1024));
			
 
				+		if (m == 1024)
			
 
				+		{
			
 
				+			e++;
			
 
				+			m = 0;
			
 
				+		}
			
 
				+
			
 
				+		assert((s >= 0) && (s <= 1));
			
 
				+		assert((e >= 0) && (e <= 31));
			
 
				+		assert((m >= 0) && (m <= 1023));
			
 
				+
			
 
				+		basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
			
 
				+		return result;
			
 
				+	}
			
 
				+		
			
 
				+	//------------------------------------------------------------------------------------------------
			
 
				+	// HDR support
			
 
				+	// 
			
 
				+	// Originally from bc6h_enc.cpp
			
 
				+	// BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
			
 
				+
			
 
				+	const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
			
 
				+	{
			
 
				+		// 2 subsets
			
 
				+		{ 10, 5, 5, 5, },	// 0, mode 1 in MS/D3D docs
			
 
				+		{ 7, 6, 6, 6, },	// 1
			
 
				+		{ 11, 5, 4, 4, },	// 2
			
 
				+		{ 11, 4, 5, 4, },	// 3
			
 
				+		{ 11, 4, 4, 5, },	// 4
			
 
				+		{ 9, 5, 5, 5, },	// 5
			
 
				+		{ 8, 6, 5, 5, },	// 6
			
 
				+		{ 8, 5, 6, 5, },	// 7
			
 
				+		{ 8, 5, 5, 6, },	// 8
			
 
				+		{ 6, 6, 6, 6, },	// 9, endpoints not delta encoded, mode 10 in MS/D3D docs
			
 
				+		// 1 subset
			
 
				+		{ 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
			
 
				+		{ 11, 9, 9, 9, },	// 11
			
 
				+		{ 12, 8, 8, 8, },	// 12
			
 
				+		{ 16, 4, 4, 4, }	// 13, also useful for solid blocks
			
 
				+	};
			
 
				+
			
 
				+	const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
			
 
				+
			
 
				+	const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
			
 
				+	{
			
 
				+		// comp_index, subset*2+lh_index, last_bit, first_bit
			
 
				+		//------------------------        mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta            
			
 
				+		{ { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
			
 
				+		{ 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
			
 
				+		{ 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
			
 
				+		{ { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
			
 
				+		{ 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
			
 
				+		{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
			
 
				+		{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
			
 
				+		{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
			
 
				+		{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
			
 
				+		{ 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
			
 
				+		{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
			
 
				+		{ 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
			
 
				+		{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
			
 
				+		{ { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
			
 
				+		{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
			
 
				+		{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
			
 
				+		{ { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
			
 
				+		{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
			
 
				+		{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
			
 
				+		{ { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
			
 
				+		{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
			
 
				+		{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
			
 
				+		{ { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
			
 
				+		{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
			
 
				+		{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
			
 
				+		{ { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
			
 
				+		{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
			
 
				+		{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
			
 
				+		//------------------------        mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
			
 
				+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
			
 
				+	};
			
 
				+
			
 
				+	// The same as the first 32 2-subset patterns in BC7. 
			
 
				+	// Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
			
 
				+	const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
			
 
				+	{
			
 
				+		{ {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
			
 
				+		{ {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
			
 
				+		{ {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
			
 
				+		{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
			
 
				+		{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
			
 
				+		{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, {  0, 0, 1, 1}, {0, 0, 0, 0x81} },
			
 
				+		{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
			
 
				+		{ {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
			
 
				+		{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
			
 
				+		{ {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
			
 
				+	};
			
 
				+
			
 
				+	const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
			
 
				+	const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
			
 
				+		
			
 
				+	struct bc6h_logical_block
			
 
				+	{
			
 
				+		uint32_t m_mode;
			
 
				+		uint32_t m_partition_pattern;	// must be 0 if 1 subset
			
 
				+		uint32_t m_endpoints[3][4];		// [comp][subset*2+lh_index] - must be already properly packed
			
 
				+		uint8_t m_weights[16];			// weights must be of the proper size, taking into account skipped MSB's which must be 0
			
 
				+
			
 
				+		void clear()
			
 
				+		{
			
 
				+			basisu::clear_obj(*this);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
			
 
				+	{
			
 
				+		assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
			
 
				+		assert(val < (1ULL << num_bits));
			
 
				+
			
 
				+		if (bit_pos < 64)
			
 
				+		{
			
 
				+			l |= (val << bit_pos);
			
 
				+
			
 
				+			if ((bit_pos + num_bits) > 64)
			
 
				+				h |= (val >> (64 - bit_pos));
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			h |= (val << (bit_pos - 64));
			
 
				+		}
			
 
				+
			
 
				+		bit_pos += num_bits;
			
 
				+		assert(bit_pos <= 128);
			
 
				+	}
			
 
				+
			
 
				+	static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
			
 
				+	{
			
 
				+		assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
			
 
				+		assert(val < (1ULL << num_bits));
			
 
				+
			
 
				+		for (uint32_t i = 0; i < num_bits; i++)
			
 
				+			write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
			
 
				+	}
			
 
				+
			
 
				+	static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
			
 
				+	{
			
 
				+		const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
			
 
				+
			
 
				+		const uint32_t mode = log_blk.m_mode;
			
 
				+		assert(mode < NUM_BC6H_MODES);
			
 
				+
			
 
				+		uint64_t l = s_mode_bits[mode], h = 0;
			
 
				+		uint32_t bit_pos = (mode >= 2) ? 5 : 2;
			
 
				+
			
 
				+		const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
			
 
				+
			
 
				+		assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
			
 
				+			((num_subsets == 1) && (!log_blk.m_partition_pattern)));
			
 
				+
			
 
				+		// Sanity checks
			
 
				+		for (uint32_t c = 0; c < 3; c++)
			
 
				+		{
			
 
				+			assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0]));	   // 1st subset l, base bits
			
 
				+			assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
			
 
				+			assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
			
 
				+			assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
			
 
				+		}
			
 
				+
			
 
				+		const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
			
 
				+
			
 
				+		while (pLayout->m_comp != -1)
			
 
				+		{
			
 
				+			uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
			
 
				+
			
 
				+			if (pLayout->m_first_bit == -1)
			
 
				+			{
			
 
				+				write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
			
 
				+
			
 
				+				v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
			
 
				+				v &= ((1 << total_bits) - 1);
			
 
				+
			
 
				+				if (pLayout->m_first_bit > pLayout->m_last_bit)
			
 
				+					write_rev_bits(v, total_bits, bit_pos, l, h);
			
 
				+				else
			
 
				+					write_bits(v, total_bits, bit_pos, l, h);
			
 
				+			}
			
 
				+
			
 
				+			pLayout++;
			
 
				+		}
			
 
				+
			
 
				+		const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
			
 
				+		const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			const uint32_t sel = log_blk.m_weights[i];
			
 
				+
			
 
				+			uint32_t num_bits = num_mode_sel_bits;
			
 
				+			if (num_subsets == 2)
			
 
				+			{
			
 
				+				const uint32_t subset_index = pPat[i];
			
 
				+				num_bits -= (subset_index >> 7);
			
 
				+			}
			
 
				+			else if (!i)
			
 
				+			{
			
 
				+				num_bits--;
			
 
				+			}
			
 
				+
			
 
				+			assert(sel < (1u << num_bits));
			
 
				+
			
 
				+			write_bits(sel, num_bits, bit_pos, l, h);
			
 
				+		}
			
 
				+
			
 
				+		assert(bit_pos == 128);
			
 
				+
			
 
				+		basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
			
 
				+		basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
			
 
				+		basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
			
 
				+		basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
			
 
				+	}
			
 
				+
			
 
				+#if 0
			
 
				+	static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
			
 
				+	{
			
 
				+		int unq;
			
 
				+
			
 
				+		if (bits_per_comp >= 15)
			
 
				+			unq = comp;
			
 
				+		else if (comp == 0)
			
 
				+			unq = 0;
			
 
				+		else if (comp == ((1u << bits_per_comp) - 1u))
			
 
				+			unq = 0xFFFFu;
			
 
				+		else
			
 
				+			unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
			
 
				+
			
 
				+		return unq;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	// Suboptimal, but very close.
			
 
				+	static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
			
 
				+	{
			
 
				+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
			
 
				+		return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
			
 
				+	}
			
 
				+
			
 
				+	// 6,7,8,9,10,11,12
			
 
				+	const uint32_t BC6H_BLOG_TAB_MIN = 6;
			
 
				+	const uint32_t BC6H_BLOG_TAB_MAX = 12;
			
 
				+	//const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
			
 
				+	
			
 
				+	// Handles 16, or 6-12 bits. Others assert.
			
 
				+	static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
			
 
				+	{
			
 
				+		BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
			
 
				+		BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
			
 
				+
			
 
				+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
			
 
				+
			
 
				+		if (num_bits == 16)
			
 
				+		{
			
 
				+			return bc6h_half_to_blog(h, 16);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
			
 
				+			
			
 
				+			// Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
			
 
				+			return bc6h_half_to_blog(h, num_bits);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	bool g_bc6h_enc_initialized;
			
 
				+
			
 
				+	void bc6h_enc_init()
			
 
				+	{
			
 
				+		if (g_bc6h_enc_initialized)
			
 
				+			return;
			
 
				+
			
 
				+		g_bc6h_enc_initialized = true;
			
 
				+	}
			
 
				+
			
 
				+	// mode 10, 4-bit weights
			
 
				+	void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			assert(pWeights[i] <= 15);
			
 
				+		}
			
 
				+
			
 
				+		bc6h_logical_block log_blk;
			
 
				+		log_blk.clear();
			
 
				+
			
 
				+		// Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
			
 
				+		for (uint32_t c = 0; c < 3; c++)
			
 
				+		{
			
 
				+			log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
			
 
				+			log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
			
 
				+		}
			
 
				+
			
 
				+		memcpy(log_blk.m_weights, pWeights, 16);
			
 
				+
			
 
				+		if (log_blk.m_weights[0] & 8)
			
 
				+		{
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
			
 
				+		pack_bc6h_block(*pPacked_block, log_blk);
			
 
				+	}
			
 
				+
			
 
				+	// Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
			
 
				+	void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			assert(pWeights[i] <= 15);
			
 
				+		}
			
 
				+
			
 
				+		bc6h_logical_block log_blk;
			
 
				+		log_blk.clear();
			
 
				+
			
 
				+		for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
			
 
				+		{
			
 
				+			const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
			
 
				+			const int base_bitmask = (1 << num_base_bits) - 1;
			
 
				+			const int delta_bitmask = (1 << num_delta_bits) - 1;
			
 
				+			BASISU_NOTE_UNUSED(base_bitmask);
			
 
				+
			
 
				+			assert(num_delta_bits < num_base_bits);
			
 
				+			assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
			
 
				+
			
 
				+			uint32_t blog_endpoints[3][2];
			
 
				+
			
 
				+			// Convert half endpoints to blog 16, 12, or 11
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
			
 
				+				assert((int)blog_endpoints[c][0] <= base_bitmask);
			
 
				+
			
 
				+				blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
			
 
				+				assert((int)blog_endpoints[c][1] <= base_bitmask);
			
 
				+			}
			
 
				+
			
 
				+			// Copy weights
			
 
				+			memcpy(log_blk.m_weights, pWeights, 16);
			
 
				+
			
 
				+			// Ensure first weight MSB is 0
			
 
				+			if (log_blk.m_weights[0] & 8)
			
 
				+			{
			
 
				+				// Invert weights
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
			
 
				+
			
 
				+				// Swap blog quantized endpoints
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+				{
			
 
				+					std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			const int max_delta = (1 << (num_delta_bits - 1)) - 1;
			
 
				+			const int min_delta = -(max_delta + 1);
			
 
				+			assert((max_delta - min_delta) == delta_bitmask);
			
 
				+
			
 
				+			bool failed_flag = false;
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
			
 
				+
			
 
				+				int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
			
 
				+				if ((delta < min_delta) || (delta > max_delta))
			
 
				+				{
			
 
				+					failed_flag = true;
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				log_blk.m_endpoints[c][1] = delta & delta_bitmask;
			
 
				+			}
			
 
				+
			
 
				+			if (failed_flag)
			
 
				+				continue;
			
 
				+
			
 
				+			log_blk.m_mode = mode;
			
 
				+			pack_bc6h_block(*pPacked_block, log_blk);
			
 
				+						
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		// Worst case fall back to mode 10, which can handle any endpoints
			
 
				+		bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
			
 
				+	}
			
 
				+
			
 
				+	// Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
			
 
				+	void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			assert(pWeights[i] <= 7);
			
 
				+		}
			
 
				+
			
 
				+		bc6h_logical_block log_blk;
			
 
				+		log_blk.clear();
			
 
				+
			
 
				+		// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
			
 
				+		for (uint32_t c = 0; c < 3; c++)
			
 
				+		{
			
 
				+			log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
			
 
				+			log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
			
 
				+
			
 
				+			log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
			
 
				+			log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
			
 
				+		}
			
 
				+
			
 
				+		memcpy(log_blk.m_weights, pWeights, 16);
			
 
				+
			
 
				+		const uint32_t pat_index = 0;
			
 
				+		const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
			
 
				+
			
 
				+		if (log_blk.m_weights[0] & 4)
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				if ((pPat[i] & 0x7F) == 0)
			
 
				+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+		}
			
 
				+
			
 
				+		if (log_blk.m_weights[15] & 4)
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+				std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				if ((pPat[i] & 0x7F) == 1)
			
 
				+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+		}
			
 
				+
			
 
				+		log_blk.m_mode = 9;
			
 
				+		log_blk.m_partition_pattern = pat_index;
			
 
				+		pack_bc6h_block(*pPacked_block, log_blk);
			
 
				+	}
			
 
				+
			
 
				+	// Tries modes 0-8, falls back to mode 9
			
 
				+	void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			assert(pWeights[i] <= 7);
			
 
				+		}
			
 
				+
			
 
				+		bc6h_logical_block log_blk;
			
 
				+		log_blk.clear();
			
 
				+
			
 
				+		for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
			
 
				+		{
			
 
				+			static const int s_mode_order[9] = { 2, 3, 4, 0,  5, 6, 7, 8,  1 }; // ordered from largest base bits to least
			
 
				+			const uint32_t mode = s_mode_order[mode_iter];
			
 
				+
			
 
				+			const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
			
 
				+			const int base_bitmask = (1 << num_base_bits) - 1;
			
 
				+			BASISU_NOTE_UNUSED(base_bitmask);
			
 
				+
			
 
				+			const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
			
 
				+			const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
			
 
				+
			
 
				+			uint32_t blog_endpoints[3][4];
			
 
				+
			
 
				+			// Convert half endpoints to blog 7-11
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
			
 
				+				blog_endpoints[c][2] = blog_endpoints[c][0];
			
 
				+				assert((int)blog_endpoints[c][0] <= base_bitmask);
			
 
				+
			
 
				+				blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
			
 
				+				blog_endpoints[c][3] = blog_endpoints[c][1];
			
 
				+				assert((int)blog_endpoints[c][1] <= base_bitmask);
			
 
				+			}
			
 
				+
			
 
				+			const uint32_t pat_index = 0;
			
 
				+			const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
			
 
				+
			
 
				+			memcpy(log_blk.m_weights, pWeights, 16);
			
 
				+
			
 
				+			if (log_blk.m_weights[0] & 4)
			
 
				+			{
			
 
				+				// Swap part 0's endpoints/weights
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+					std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					if ((pPat[i] & 0x7F) == 0)
			
 
				+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+			}
			
 
				+
			
 
				+			if (log_blk.m_weights[15] & 4)
			
 
				+			{
			
 
				+				// Swap part 1's endpoints/weights
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+					std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					if ((pPat[i] & 0x7F) == 1)
			
 
				+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+			}
			
 
				+
			
 
				+			bool failed_flag = false;
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
			
 
				+
			
 
				+				const int min_delta = -(max_delta + 1);
			
 
				+				assert((max_delta - min_delta) == delta_bitmasks[c]);
			
 
				+
			
 
				+				log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
			
 
				+
			
 
				+				int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
			
 
				+				int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
			
 
				+				int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
			
 
				+
			
 
				+				if ((delta0 < min_delta) || (delta0 > max_delta) ||
			
 
				+					(delta1 < min_delta) || (delta1 > max_delta) ||
			
 
				+					(delta2 < min_delta) || (delta2 > max_delta))
			
 
				+				{
			
 
				+					failed_flag = true;
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
			
 
				+				log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
			
 
				+				log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
			
 
				+
			
 
				+				if (failed_flag)
			
 
				+					break;
			
 
				+			}
			
 
				+			if (failed_flag)
			
 
				+				continue;
			
 
				+
			
 
				+			log_blk.m_mode = mode;
			
 
				+			log_blk.m_partition_pattern = pat_index;
			
 
				+			pack_bc6h_block(*pPacked_block, log_blk);
			
 
				+
			
 
				+			return;
			
 
				+
			
 
				+		} // mode_iter
			
 
				+
			
 
				+		bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
			
 
				+	}
			
 
				+
			
 
				+	// pEndpoints[subset][comp][lh_index]
			
 
				+	void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+		assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			assert(pWeights[i] <= 7);
			
 
				+		}
			
 
				+
			
 
				+		bc6h_logical_block log_blk;
			
 
				+		log_blk.clear();
			
 
				+
			
 
				+		// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
			
 
				+		for (uint32_t s = 0; s < 2; s++)
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
			
 
				+				log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		memcpy(log_blk.m_weights, pWeights, 16);
			
 
				+
			
 
				+		//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
			
 
				+		const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
			
 
				+
			
 
				+		const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
			
 
				+		if (invert_flag)
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
			
 
				+				std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		const uint32_t pat_index = bc7_pattern;
			
 
				+		assert(pat_index < 32);
			
 
				+		const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
			
 
				+
			
 
				+		bool swap_flags[2] = { false, false };
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			if ((pPat[i] & 0x80) == 0)
			
 
				+				continue;
			
 
				+
			
 
				+			if (log_blk.m_weights[i] & 4)
			
 
				+			{
			
 
				+				const uint32_t p = pPat[i] & 1;
			
 
				+				swap_flags[p] = true;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (swap_flags[0])
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				if ((pPat[i] & 0x7F) == 0)
			
 
				+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+		}
			
 
				+
			
 
				+		if (swap_flags[1])
			
 
				+		{
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+				std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				if ((pPat[i] & 0x7F) == 1)
			
 
				+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+		}
			
 
				+
			
 
				+		log_blk.m_mode = 9;
			
 
				+		log_blk.m_partition_pattern = pat_index;
			
 
				+		pack_bc6h_block(*pPacked_block, log_blk);
			
 
				+	}
			
 
				+
			
 
				+	void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 16; i++)
			
 
				+		{
			
 
				+			assert(pWeights[i] <= 7);
			
 
				+		}
			
 
				+
			
 
				+		bc6h_logical_block log_blk;
			
 
				+		log_blk.clear();
			
 
				+
			
 
				+		for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
			
 
				+		{
			
 
				+			static const int s_mode_order[9] = { 2, 3, 4, 0,  5, 6, 7, 8,  1 }; // ordered from largest base bits to least
			
 
				+			const uint32_t mode = s_mode_order[mode_iter];
			
 
				+
			
 
				+			const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
			
 
				+			const int base_bitmask = (1 << num_base_bits) - 1;
			
 
				+			BASISU_NOTE_UNUSED(base_bitmask);
			
 
				+
			
 
				+			const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
			
 
				+			const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
			
 
				+
			
 
				+			uint32_t blog_endpoints[3][4];
			
 
				+
			
 
				+			// Convert half endpoints to blog 7-11
			
 
				+			for (uint32_t s = 0; s < 2; s++)
			
 
				+			{
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+				{
			
 
				+					blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
			
 
				+					blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			memcpy(log_blk.m_weights, pWeights, 16);
			
 
				+
			
 
				+			//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
			
 
				+			const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
			
 
				+
			
 
				+			const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
			
 
				+			if (invert_flag)
			
 
				+			{
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+				{
			
 
				+					std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
			
 
				+					std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			const uint32_t pat_index = bc7_pattern;
			
 
				+			assert(pat_index < 32);
			
 
				+			const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
			
 
				+
			
 
				+			bool swap_flags[2] = { false, false };
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+			{
			
 
				+				if ((pPat[i] & 0x80) == 0)
			
 
				+					continue;
			
 
				+
			
 
				+				if (log_blk.m_weights[i] & 4)
			
 
				+				{
			
 
				+					const uint32_t p = pPat[i] & 1;
			
 
				+					swap_flags[p] = true;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (swap_flags[0])
			
 
				+			{
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+					std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					if ((pPat[i] & 0x7F) == 0)
			
 
				+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+			}
			
 
				+
			
 
				+			if (swap_flags[1])
			
 
				+			{
			
 
				+				for (uint32_t c = 0; c < 3; c++)
			
 
				+					std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					if ((pPat[i] & 0x7F) == 1)
			
 
				+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
			
 
				+			}
			
 
				+
			
 
				+			// Try packing the endpoints
			
 
				+			bool failed_flag = false;
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
			
 
				+
			
 
				+				const int min_delta = -(max_delta + 1);
			
 
				+				assert((max_delta - min_delta) == delta_bitmasks[c]);
			
 
				+
			
 
				+				log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
			
 
				+
			
 
				+				int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
			
 
				+				int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
			
 
				+				int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
			
 
				+
			
 
				+				if ((delta0 < min_delta) || (delta0 > max_delta) ||
			
 
				+					(delta1 < min_delta) || (delta1 > max_delta) ||
			
 
				+					(delta2 < min_delta) || (delta2 > max_delta))
			
 
				+				{
			
 
				+					failed_flag = true;
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
			
 
				+				log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
			
 
				+				log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
			
 
				+
			
 
				+				if (failed_flag)
			
 
				+					break;
			
 
				+			}
			
 
				+			if (failed_flag)
			
 
				+				continue;
			
 
				+
			
 
				+			log_blk.m_mode = mode;
			
 
				+			log_blk.m_partition_pattern = pat_index;
			
 
				+			pack_bc6h_block(*pPacked_block, log_blk);
			
 
				+
			
 
				+			//half_float blk[16 * 3];
			
 
				+			//unpack_bc6h(pPacked_block, blk, false);
			
 
				+
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
			
 
				+	}
			
 
				+
			
 
				+	bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
			
 
				+	{
			
 
				+		assert(g_bc6h_enc_initialized);
			
 
				+
			
 
				+		if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
			
 
				+			return false;
			
 
				+
			
 
				+		// ASTC block unpacker won't allow Inf/NaN's to come through.
			
 
				+		//if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
			
 
				+		//	return false;
			
 
				+
			
 
				+		uint8_t weights[16];
			
 
				+		memset(weights, 0, sizeof(weights));
			
 
				+
			
 
				+		half_float endpoints[3][2];
			
 
				+		endpoints[0][0] = pColor[0];
			
 
				+		endpoints[0][1] = pColor[0];
			
 
				+				
			
 
				+		endpoints[1][0] = pColor[1];
			
 
				+		endpoints[1][1] = pColor[1];
			
 
				+
			
 
				+		endpoints[2][0] = pColor[2];
			
 
				+		endpoints[2][1] = pColor[2];
			
 
				+				
			
 
				+		bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+	// basisu_astc_hdr_core.cpp
			
 
				+
			
 
				+	static bool g_astc_hdr_core_initialized;
			
 
				+	static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	void astc_hdr_core_init()
			
 
				+	{
			
 
				+		if (g_astc_hdr_core_initialized)
			
 
				+			return;
			
 
				+
			
 
				+		memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
			
 
				+
			
 
				+		for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
			
 
				+		{
			
 
				+			const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
			
 
				+			//const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
			
 
				+
			
 
				+			assert(astc_pattern < 1024);
			
 
				+			g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
			
 
				+		}
			
 
				+
			
 
				+		g_astc_hdr_core_initialized = true;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	static inline int astc_hdr_sign_extend(int src, int num_src_bits)
			
 
				+	{
			
 
				+		assert(basisu::in_range(num_src_bits, 2, 31));
			
 
				+
			
 
				+		const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
			
 
				+		if (negative)
			
 
				+			return src | ~((1 << num_src_bits) - 1);
			
 
				+		else
			
 
				+			return src & ((1 << num_src_bits) - 1);
			
 
				+	}
			
 
				+
			
 
				+	static inline void astc_hdr_pack_bit(
			
 
				+		int& dst, int dst_bit,
			
 
				+		int src_val, int src_bit = 0)
			
 
				+	{
			
 
				+		assert(dst_bit >= 0 && dst_bit <= 31);
			
 
				+		int bit = basisu::get_bit(src_val, src_bit);
			
 
				+		dst |= (bit << dst_bit);
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	void decode_mode7_to_qlog12_ise20(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3],
			
 
				+		int* pScale)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+
			
 
				+		for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
			
 
				+		{
			
 
				+			assert(pEndpoints[i] <= 255);
			
 
				+		}
			
 
				+
			
 
				+		const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
			
 
				+
			
 
				+		// Extract mode bits and unpack to major component and mode.
			
 
				+		const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
			
 
				+
			
 
				+		int majcomp, mode;
			
 
				+		if ((modeval & 0xC) != 0xC)
			
 
				+		{
			
 
				+			majcomp = modeval >> 2;
			
 
				+			mode = modeval & 3;
			
 
				+		}
			
 
				+		else if (modeval != 0xF)
			
 
				+		{
			
 
				+			majcomp = modeval & 3;
			
 
				+			mode = 4;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			majcomp = 0;
			
 
				+			mode = 5;
			
 
				+		}
			
 
				+
			
 
				+		// Extract low-order bits of r, g, b, and s.
			
 
				+		int red = v0 & 0x3f;
			
 
				+		int green = v1 & 0x1f;
			
 
				+		int blue = v2 & 0x1f;
			
 
				+		int scale = v3 & 0x1f;
			
 
				+
			
 
				+		// Extract high-order bits, which may be assigned depending on mode
			
 
				+		int x0 = (v1 >> 6) & 1;
			
 
				+		int x1 = (v1 >> 5) & 1;
			
 
				+		int x2 = (v2 >> 6) & 1;
			
 
				+		int x3 = (v2 >> 5) & 1;
			
 
				+		int x4 = (v3 >> 7) & 1;
			
 
				+		int x5 = (v3 >> 6) & 1;
			
 
				+		int x6 = (v3 >> 5) & 1;
			
 
				+
			
 
				+		// Now move the high-order xs into the right place.
			
 
				+		const int ohm = 1 << mode;
			
 
				+		if (ohm & 0x30) green |= x0 << 6;
			
 
				+		if (ohm & 0x3A) green |= x1 << 5;
			
 
				+		if (ohm & 0x30) blue |= x2 << 6;
			
 
				+		if (ohm & 0x3A) blue |= x3 << 5;
			
 
				+		if (ohm & 0x3D) scale |= x6 << 5;
			
 
				+		if (ohm & 0x2D) scale |= x5 << 6;
			
 
				+		if (ohm & 0x04) scale |= x4 << 7;
			
 
				+		if (ohm & 0x3B) red |= x4 << 6;
			
 
				+		if (ohm & 0x04) red |= x3 << 6;
			
 
				+		if (ohm & 0x10) red |= x5 << 7;
			
 
				+		if (ohm & 0x0F) red |= x2 << 7;
			
 
				+		if (ohm & 0x05) red |= x1 << 8;
			
 
				+		if (ohm & 0x0A) red |= x0 << 8;
			
 
				+		if (ohm & 0x05) red |= x0 << 9;
			
 
				+		if (ohm & 0x02) red |= x6 << 9;
			
 
				+		if (ohm & 0x01) red |= x3 << 10;
			
 
				+		if (ohm & 0x02) red |= x5 << 10;
			
 
				+
			
 
				+		// Shift the bits to the top of the 12-bit result.
			
 
				+		static const int s_shamts[6] = { 1,1,2,3,4,5 };
			
 
				+
			
 
				+		const int shamt = s_shamts[mode];
			
 
				+		red <<= shamt;
			
 
				+		green <<= shamt;
			
 
				+		blue <<= shamt;
			
 
				+		scale <<= shamt;
			
 
				+
			
 
				+		// Minor components are stored as differences
			
 
				+		if (mode != 5)
			
 
				+		{
			
 
				+			green = red - green;
			
 
				+			blue = red - blue;
			
 
				+		}
			
 
				+
			
 
				+		// Swizzle major component into place
			
 
				+		if (majcomp == 1)
			
 
				+			std::swap(red, green);
			
 
				+
			
 
				+		if (majcomp == 2)
			
 
				+			std::swap(red, blue);
			
 
				+
			
 
				+		// Clamp output values, set alpha to 1.0
			
 
				+		e[1][0] = basisu::clamp(red, 0, 0xFFF);
			
 
				+		e[1][1] = basisu::clamp(green, 0, 0xFFF);
			
 
				+		e[1][2] = basisu::clamp(blue, 0, 0xFFF);
			
 
				+
			
 
				+		e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
			
 
				+		e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
			
 
				+		e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
			
 
				+
			
 
				+		if (pScale)
			
 
				+			*pScale = scale;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	bool decode_mode7_to_qlog12(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3],
			
 
				+		int* pScale,
			
 
				+		uint32_t ise_endpoint_range)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+
			
 
				+		if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
			
 
				+		{
			
 
				+			decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
			
 
				+
			
 
				+			for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
			
 
				+				dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
			
 
				+
			
 
				+			decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
			
 
				+		}
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 2; i++)
			
 
				+		{
			
 
				+			if (e[i][0] > (int)MAX_QLOG12)
			
 
				+				return false;
			
 
				+
			
 
				+			if (e[i][1] > (int)MAX_QLOG12)
			
 
				+				return false;
			
 
				+
			
 
				+			if (e[i][2] > (int)MAX_QLOG12)
			
 
				+				return false;
			
 
				+		}
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	void decode_mode11_to_qlog12_ise20(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3])
			
 
				+	{
			
 
				+#ifdef _DEBUG
			
 
				+		for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
			
 
				+		{
			
 
				+			assert(pEndpoints[i] <= 255);
			
 
				+		}
			
 
				+#endif
			
 
				+
			
 
				+		const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
			
 
				+
			
 
				+		if (maj_comp == 3)
			
 
				+		{
			
 
				+			// Direct, qlog8 and qlog7
			
 
				+			e[0][0] = pEndpoints[0] << 4;
			
 
				+			e[1][0] = pEndpoints[1] << 4;
			
 
				+
			
 
				+			e[0][1] = pEndpoints[2] << 4;
			
 
				+			e[1][1] = pEndpoints[3] << 4;
			
 
				+
			
 
				+			e[0][2] = (pEndpoints[4] & 127) << 5;
			
 
				+			e[1][2] = (pEndpoints[5] & 127) << 5;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			int v0 = pEndpoints[0];
			
 
				+			int v1 = pEndpoints[1];
			
 
				+			int v2 = pEndpoints[2];
			
 
				+			int v3 = pEndpoints[3];
			
 
				+			int v4 = pEndpoints[4];
			
 
				+			int v5 = pEndpoints[5];
			
 
				+
			
 
				+			int mode = 0;
			
 
				+			astc_hdr_pack_bit(mode, 0, v1, 7);
			
 
				+			astc_hdr_pack_bit(mode, 1, v2, 7);
			
 
				+			astc_hdr_pack_bit(mode, 2, v3, 7);
			
 
				+
			
 
				+			int va = v0;
			
 
				+			astc_hdr_pack_bit(va, 8, v1, 6);
			
 
				+
			
 
				+			int vb0 = v2 & 63;
			
 
				+			int vb1 = v3 & 63;
			
 
				+			int vc = v1 & 63;
			
 
				+
			
 
				+			int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
			
 
				+			int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
			
 
				+			static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
			
 
				+			vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
			
 
				+			vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
			
 
				+
			
 
				+			int x0 = basisu::get_bit(v2, 6);
			
 
				+			int x1 = basisu::get_bit(v3, 6);
			
 
				+			int x2 = basisu::get_bit(v4, 6);
			
 
				+			int x3 = basisu::get_bit(v5, 6);
			
 
				+			int x4 = basisu::get_bit(v4, 5);
			
 
				+			int x5 = basisu::get_bit(v5, 5);
			
 
				+
			
 
				+			const uint32_t ohm = 1U << mode;
			
 
				+			if (ohm & 0xA4) va |= (x0 << 9);
			
 
				+			if (ohm & 0x08) va |= (x2 << 9);
			
 
				+			if (ohm & 0x50) va |= (x4 << 9);
			
 
				+			if (ohm & 0x50) va |= (x5 << 10);
			
 
				+			if (ohm & 0xA0) va |= (x1 << 10);
			
 
				+			if (ohm & 0xC0) va |= (x2 << 11);
			
 
				+			if (ohm & 0x04) vc |= (x1 << 6);
			
 
				+			if (ohm & 0xE8) vc |= (x3 << 6);
			
 
				+			if (ohm & 0x20) vc |= (x2 << 7);
			
 
				+			if (ohm & 0x5B) vb0 |= (x0 << 6);
			
 
				+			if (ohm & 0x5B) vb1 |= (x1 << 6);
			
 
				+			if (ohm & 0x12) vb0 |= (x2 << 7);
			
 
				+			if (ohm & 0x12) vb1 |= (x3 << 7);
			
 
				+
			
 
				+			const int shamt = (mode >> 1) ^ 3;
			
 
				+			
			
 
				+			va  = (uint32_t)va  << shamt;
			
 
				+			vb0 = (uint32_t)vb0 << shamt;
			
 
				+			vb1 = (uint32_t)vb1 << shamt;
			
 
				+			vc  = (uint32_t)vc  << shamt;
			
 
				+			vd0 = (uint32_t)vd0 << shamt;
			
 
				+			vd1 = (uint32_t)vd1 << shamt;
			
 
				+
			
 
				+			// qlog12
			
 
				+			e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
			
 
				+			e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
			
 
				+			e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
			
 
				+
			
 
				+			e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
			
 
				+			e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
			
 
				+			e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
			
 
				+
			
 
				+			if (maj_comp)
			
 
				+			{
			
 
				+				std::swap(e[0][0], e[0][maj_comp]);
			
 
				+				std::swap(e[1][0], e[1][maj_comp]);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	bool decode_mode11_to_qlog12(
			
 
				+		const uint8_t* pEndpoints,
			
 
				+		int e[2][3],
			
 
				+		uint32_t ise_endpoint_range)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+		assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
			
 
				+
			
 
				+		if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
			
 
				+		{
			
 
				+			decode_mode11_to_qlog12_ise20(pEndpoints, e);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
			
 
				+
			
 
				+			for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
			
 
				+				dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
			
 
				+
			
 
				+			decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
			
 
				+		}
			
 
				+
			
 
				+		for (uint32_t i = 0; i < 2; i++)
			
 
				+		{
			
 
				+			if (e[i][0] > (int)MAX_QLOG12)
			
 
				+				return false;
			
 
				+
			
 
				+			if (e[i][1] > (int)MAX_QLOG12)
			
 
				+				return false;
			
 
				+
			
 
				+			if (e[i][2] > (int)MAX_QLOG12)
			
 
				+				return false;
			
 
				+		}
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+		assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
			
 
				+		
			
 
				+		if (best_blk.m_weight_ise_range == 5)
			
 
				+		{
			
 
				+			// Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
			
 
				+			bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			uint8_t bc6h_weights[16];
			
 
				+
			
 
				+			if (best_blk.m_weight_ise_range == 1)
			
 
				+			{
			
 
				+				// weight ISE 1: 3 levels
			
 
				+				static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
			
 
				+			}
			
 
				+			else if (best_blk.m_weight_ise_range == 2)
			
 
				+			{
			
 
				+				// weight ISE 2: 4 levels
			
 
				+				static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
			
 
				+			}
			
 
				+			else if (best_blk.m_weight_ise_range == 3)
			
 
				+			{
			
 
				+				// weight ISE 3: 5 levels
			
 
				+				static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
			
 
				+			}
			
 
				+			else if (best_blk.m_weight_ise_range == 4)
			
 
				+			{
			
 
				+				// weight ISE 4: 6 levels
			
 
				+				static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
			
 
				+			}
			
 
				+			else if (best_blk.m_weight_ise_range == 6)
			
 
				+			{
			
 
				+				// weight ISE 6: 10 levels
			
 
				+				static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
			
 
				+			}
			
 
				+			else if (best_blk.m_weight_ise_range == 7)
			
 
				+			{
			
 
				+				// weight ISE 7: 12 levels
			
 
				+				static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < 16; i++)
			
 
				+					bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
			
 
				+			}
			
 
				+			else if (best_blk.m_weight_ise_range == 8)
			
 
				+			{
			
 
				+				// 16 levels
			
 
				+				memcpy(bc6h_weights, best_blk.m_weights, 16);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				assert(0);
			
 
				+				return false;
			
 
				+			}
			
 
				+
			
 
				+			bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
			
 
				+		}
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+	bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+		assert(best_blk.m_num_partitions == 2);
			
 
				+		assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
			
 
				+		
			
 
				+		half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
			
 
				+
			
 
				+		// UASTC HDR checks
			
 
				+		// Both CEM's must be equal in 2-subset UASTC HDR.
			
 
				+		if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
			
 
				+			return false;
			
 
				+		if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
			
 
				+			return false;
			
 
				+				
			
 
				+		if (best_blk.m_color_endpoint_modes[0] == 7)
			
 
				+		{
			
 
				+			if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
			
 
				+		 		  ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
			
 
				+				  ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
			
 
				+				  ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
			
 
				+				  ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
			
 
				+			{
			
 
				+				return false;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
			
 
				+				  ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
			
 
				+			{
			
 
				+				return false;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		for (uint32_t s = 0; s < 2; s++)
			
 
				+		{
			
 
				+			int e[2][3];
			
 
				+			if (best_blk.m_color_endpoint_modes[0] == 7)
			
 
				+			{
			
 
				+				bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
			
 
				+				if (!success)
			
 
				+					return false;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
			
 
				+				if (!success)
			
 
				+					return false;
			
 
				+			}
			
 
				+
			
 
				+			for (uint32_t c = 0; c < 3; c++)
			
 
				+			{
			
 
				+				bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12);
			
 
				+				if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
			
 
				+					return false;
			
 
				+
			
 
				+				bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12);
			
 
				+				if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
			
 
				+					return false;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		uint8_t bc6h_weights[16];
			
 
				+		if (best_blk.m_weight_ise_range == 1)
			
 
				+		{
			
 
				+			static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
			
 
				+		}
			
 
				+		else if (best_blk.m_weight_ise_range == 2)
			
 
				+		{
			
 
				+			static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
			
 
				+		}
			
 
				+		else if (best_blk.m_weight_ise_range == 3)
			
 
				+		{
			
 
				+			static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
			
 
				+		}
			
 
				+		else if (best_blk.m_weight_ise_range == 4)
			
 
				+		{
			
 
				+			static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
			
 
				+
			
 
				+			for (uint32_t i = 0; i < 16; i++)
			
 
				+				bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
			
 
				+		}
			
 
				+		else if (best_blk.m_weight_ise_range == 5)
			
 
				+		{
			
 
				+			memcpy(bc6h_weights, best_blk.m_weights, 16);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+	// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
			
 
				+	bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+		if (!g_astc_hdr_core_initialized)
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		astc_helpers::log_astc_block log_blk;
			
 
				+
			
 
				+		if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
			
 
				+		{
			
 
				+			// Failed unpacking ASTC data
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
			
 
				+	}
			
 
				+
			
 
				+	//--------------------------------------------------------------------------------------------------------------------------
			
 
				+	// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
			
 
				+	bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
			
 
				+	{
			
 
				+		assert(g_astc_hdr_core_initialized);
			
 
				+		if (!g_astc_hdr_core_initialized)
			
 
				+		{
			
 
				+			assert(0);
			
 
				+			return false;
			
 
				+		}
			
 
				+				
			
 
				+		if (log_blk.m_solid_color_flag_ldr)
			
 
				+		{
			
 
				+			// Don't support LDR solid colors.
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		if (log_blk.m_solid_color_flag_hdr)
			
 
				+		{
			
 
				+			// Solid color HDR block
			
 
				+			return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
			
 
				+		}
			
 
				+
			
 
				+		// Only support 4x4 grid sizes
			
 
				+		if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
			
 
				+			return false;
			
 
				+				
			
 
				+		// Don't support dual plane encoding
			
 
				+		if (log_blk.m_dual_plane)
			
 
				+			return false;
			
 
				+
			
 
				+		if (log_blk.m_num_partitions == 1)
			
 
				+		{
			
 
				+			// Handle 1 partition (or subset)
			
 
				+			
			
 
				+			// UASTC HDR checks
			
 
				+			if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
			
 
				+				return false;
			
 
				+									
			
 
				+			int e[2][3];
			
 
				+			bool success;
			
 
				+
			
 
				+			if (log_blk.m_color_endpoint_modes[0] == 7)
			
 
				+			{
			
 
				+				if (log_blk.m_endpoint_ise_range != 20)
			
 
				+					return false;
			
 
				+
			
 
				+				success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
			
 
				+			}
			
 
				+			else if (log_blk.m_color_endpoint_modes[0] == 11)
			
 
				+			{
			
 
				+				// UASTC HDR checks
			
 
				+				if (log_blk.m_weight_ise_range <= 7)
			
 
				+				{
			
 
				+					if (log_blk.m_endpoint_ise_range != 20)
			
 
				+						return false;
			
 
				+				}
			
 
				+				else if (log_blk.m_endpoint_ise_range != 19)
			
 
				+				{
			
 
				+					return false;
			
 
				+				}
			
 
				+
			
 
				+				success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				return false;
			
 
				+			}
			
 
				+
			
 
				+			if (!success)
			
 
				+				return false;
			
 
				+
			
 
				+			// Transform endpoints to half float
			
 
				+			half_float h_e[3][2] =
			
 
				+			{
			
 
				+				{ qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) },
			
 
				+				{ qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) },
			
 
				+				{ qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) }
			
 
				+			};
			
 
				+
			
 
				+			// Sanity check for NaN/Inf
			
 
				+			for (uint32_t i = 0; i < 2; i++)
			
 
				+				if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
			
 
				+					return false;
			
 
				+			
			
 
				+			// Transcode to bc6h
			
 
				+			if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
			
 
				+				return false;
			
 
				+		}
			
 
				+		else if (log_blk.m_num_partitions == 2)
			
 
				+		{
			
 
				+			// Handle 2 partition (or subset)
			
 
				+			int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
			
 
				+			if (common_bc7_pat_index < 0)
			
 
				+				return false;
			
 
				+
			
 
				+			assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
			
 
				+						
			
 
				+			if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
			
 
				+				return false;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// Only supports 1 or 2 partitions (or subsets)
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		return true;
			
 
				+	}
			
 
				+#endif // BASISD_SUPPORT_UASTC_HDR
			
 
				+
			
 
				 } // namespace basist
			
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.h
@@ -1,5 +1,5 @@
 
				 // basisu_transcoder.h
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -29,6 +29,7 @@
 
				 
			
 
				 // Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development.
			
 
				 #ifndef BASISU_FORCE_DEVEL_MESSAGES
			
 
				+	// TODO - disable before checking in
			
 
				 	#define BASISU_FORCE_DEVEL_MESSAGES 0
			
 
				 #endif
			
 
				 
			
@@ -55,7 +56,7 @@ namespace basist
 
				 		cTFETC2_RGBA = 1,							// Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files
			
 
				 
			
 
				 		// BC1-5, BC7 (desktop, some mobile devices)
			
 
				-		cTFBC1_RGB = 2,							// Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
			
 
				+		cTFBC1_RGB = 2,								// Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
			
 
				 		cTFBC3_RGBA = 3, 							// Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files
			
 
				 		cTFBC4_R = 4,								// Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
			
 
				 		cTFBC5_RG = 5,								// XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's)
			
@@ -63,10 +64,11 @@ namespace basist
 
				 
			
 
				 		// PVRTC1 4bpp (mobile, PowerVR devices)
			
 
				 		cTFPVRTC1_4_RGB = 8,						// Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format.
			
 
				-		cTFPVRTC1_4_RGBA = 9,					// Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
			
 
				+		cTFPVRTC1_4_RGBA = 9,						// Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
			
 
				 
			
 
				 		// ASTC (mobile, Intel devices, hopefully all desktop GPU's one day)
			
 
				-		cTFASTC_4x4_RGBA = 10,					// Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
			
 
				+		cTFASTC_4x4_RGBA = 10,						// LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. 
			
 
				+													// LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
			
 
				 
			
 
				 		// ATC (mobile, Adreno devices, this is a niche format)
			
 
				 		cTFATC_RGB = 11,							// Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD)
			
@@ -74,8 +76,8 @@ namespace basist
 
				 
			
 
				 		// FXT1 (desktop, Intel devices, this is a super obscure format)
			
 
				 		cTFFXT1_RGB = 17,							// Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630).
			
 
				-														// Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
			
 
				-														// See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
			
 
				+													// Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
			
 
				+													// See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
			
 
				 
			
 
				 		cTFPVRTC2_4_RGB = 18,					// Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB).
			
 
				 		cTFPVRTC2_4_RGBA = 19,					// Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks.
			
@@ -83,13 +85,22 @@ namespace basist
 
				 		cTFETC2_EAC_R11 = 20,					// R only (ETC2 EAC R11 unsigned)
			
 
				 		cTFETC2_EAC_RG11 = 21,					// RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps
			
 
				 
			
 
				+		cTFBC6H = 22,							// HDR, RGB only, unsigned
			
 
				+		cTFASTC_HDR_4x4_RGBA = 23,				// HDR, RGBA (currently UASTC HDR is only RGB), unsigned
			
 
				+
			
 
				 		// Uncompressed (raw pixel) formats
			
 
				+		// Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR).
			
 
				 		cTFRGBA32 = 13,							// 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte.
			
 
				 		cTFRGB565 = 14,							// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11
			
 
				 		cTFBGR565 = 15,							// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0
			
 
				-		cTFRGBA4444 = 16,							// 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
			
 
				+		cTFRGBA4444 = 16,						// 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
			
 
				+		
			
 
				+		// Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR).
			
 
				+		cTFRGB_HALF = 24,						// 48bpp RGB half (16-bits/component, 3 components)
			
 
				+		cTFRGBA_HALF = 25,						// 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha)
			
 
				+		cTFRGB_9E5 = 26,						// 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent)
			
 
				 
			
 
				-		cTFTotalTextureFormats = 22,
			
 
				+		cTFTotalTextureFormats = 27,
			
 
				 
			
 
				 		// Old enums for compatibility with code compiled against previous versions
			
 
				 		cTFETC1 = cTFETC1_RGB,
			
@@ -124,6 +135,9 @@ namespace basist
 
				 	// Returns true if the format supports an alpha channel.
			
 
				 	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt);
			
 
				 
			
 
				+	// Returns true if the format is HDR.
			
 
				+	bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt);
			
 
				+
			
 
				 	// Returns the basisu::texture_format corresponding to the specified transcoder_texture_format.
			
 
				 	basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt);
			
 
				 
			
@@ -142,7 +156,7 @@ namespace basist
 
				 	// Returns the block height for the specified texture format, which is currently always 4.
			
 
				 	uint32_t basis_get_block_height(transcoder_texture_format tex_type);
			
 
				 
			
 
				-	// Returns true if the specified format was enabled at compile time.
			
 
				+	// Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR).
			
 
				 	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S);
			
 
				 
			
 
				 	// Validates that the output buffer is large enough to hold the entire transcoded texture.
			
@@ -317,6 +331,42 @@ namespace basist
 
				 			int channel0 = -1, int channel1 = -1);
			
 
				 	};
			
 
				 
			
 
				+	class basisu_lowlevel_uastc_hdr_transcoder
			
 
				+	{
			
 
				+		friend class basisu_transcoder;
			
 
				+
			
 
				+	public:
			
 
				+		basisu_lowlevel_uastc_hdr_transcoder();
			
 
				+
			
 
				+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
			
 
				+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
			
 
				+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
			
 
				+
			
 
				+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
			
 
				+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
			
 
				+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
			
 
				+		{
			
 
				+			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
			
 
				+				output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
			
 
				+				pState, output_rows_in_pixels, channel0, channel1, decode_flags);
			
 
				+		}
			
 
				+
			
 
				+		// Container independent transcoding
			
 
				+		bool transcode_image(
			
 
				+			transcoder_texture_format target_format,
			
 
				+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
			
 
				+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
			
 
				+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
			
 
				+			uint32_t slice_offset, uint32_t slice_length,
			
 
				+			uint32_t decode_flags = 0,
			
 
				+			bool has_alpha = false,
			
 
				+			bool is_video = false,
			
 
				+			uint32_t output_row_pitch_in_blocks_or_pixels = 0,
			
 
				+			basisu_transcoder_state* pState = nullptr,
			
 
				+			uint32_t output_rows_in_pixels = 0,
			
 
				+			int channel0 = -1, int channel1 = -1);
			
 
				+	};
			
 
				+
			
 
				 	struct basisu_slice_info
			
 
				 	{
			
 
				 		uint32_t m_orig_width;
			
@@ -530,6 +580,7 @@ namespace basist
 
				 	private:
			
 
				 		mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder;
			
 
				 		mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder;
			
 
				+		mutable basisu_lowlevel_uastc_hdr_transcoder m_lowlevel_uastc_hdr_decoder;
			
 
				 
			
 
				 		bool m_ready_to_transcode;
			
 
				 
			
@@ -612,10 +663,12 @@ namespace basist
 
				 #pragma pack(pop)
			
 
				 
			
 
				 	const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0;
			
 
				+	const uint32_t KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK = 1000066000; // TODO, is this correct?
			
 
				 	const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166;
			
 
				+	const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR = 167;
			
 
				 	const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163;
			
 
				 	const uint32_t KTX2_IMAGE_IS_P_FRAME = 2;
			
 
				-	const uint32_t KTX2_UASTC_BLOCK_SIZE = 16;
			
 
				+	const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR
			
 
				 	const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased
			
 
				 
			
 
				 	// The KTX2 transfer functions supported by KTX2
			
@@ -800,13 +853,15 @@ namespace basist
 
				 		// Returns 0 or the number of layers in the texture array or texture video. Valid after init().
			
 
				 		uint32_t get_layers() const { return m_header.m_layer_count; }
			
 
				 
			
 
				-		// Returns cETC1S or cUASTC4x4. Valid after init().
			
 
				+		// Returns cETC1S, cUASTC4x4, or cUASTC_HDR_4x4. Valid after init().
			
 
				 		basist::basis_tex_format get_format() const { return m_format; } 
			
 
				-
			
 
				+				
			
 
				 		bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; }
			
 
				 
			
 
				 		bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; }
			
 
				 
			
 
				+		bool is_hdr() const { return get_format() == basist::basis_tex_format::cUASTC_HDR_4x4; }
			
 
				+
			
 
				 		// Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init().
			
 
				 		uint32_t get_has_alpha() const { return m_has_alpha; }
			
 
				 
			
@@ -913,6 +968,7 @@ namespace basist
 
				 								
			
 
				 		basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder;
			
 
				 		basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder;
			
 
				+		basist::basisu_lowlevel_uastc_hdr_transcoder m_uastc_hdr_transcoder;
			
 
				 				
			
 
				 		ktx2_transcoder_state m_def_transcoder_state;
			
 
				 
			
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
@@ -1,5 +1,5 @@
 
				 // basisu_transcoder_internal.h - Universal texture format transcoder library.
			
 
				-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
			
 
				 //
			
@@ -20,8 +20,9 @@
 
				 #pragma warning (disable: 4127) //  conditional expression is constant
			
 
				 #endif
			
 
				 
			
 
				-#define BASISD_LIB_VERSION 116
			
 
				-#define BASISD_VERSION_STRING "01.16"
			
 
				+// v1.50: Added UASTC HDR support
			
 
				+#define BASISD_LIB_VERSION 150
			
 
				+#define BASISD_VERSION_STRING "01.50"
			
 
				 
			
 
				 #ifdef _DEBUG
			
 
				 #define BASISD_BUILD_DEBUG
			
@@ -82,9 +83,15 @@ namespace basist
 
				 		cRGBA4444_ALPHA,
			
 
				 		cRGBA4444_COLOR_OPAQUE,
			
 
				 		cRGBA4444,
			
 
				-
			
 
				-		cUASTC_4x4,
			
 
				-						
			
 
				+		cRGBA_HALF,
			
 
				+		cRGB_HALF,
			
 
				+		cRGB_9E5,
			
 
				+
			
 
				+		cUASTC_4x4,							// LDR, universal
			
 
				+		cUASTC_HDR_4x4,						// HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
			
 
				+		cBC6H,
			
 
				+		cASTC_HDR_4x4,
			
 
				+								
			
 
				 		cTotalBlockFormats
			
 
				 	};
			
 
				 
			
@@ -264,8 +271,8 @@ namespace basist
 
				 		}
			
 
				 
			
 
				 		const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
			
 
				-		const basisu::int_vec get_lookup() const { return m_lookup; }
			
 
				-		const basisu::int16_vec get_tree() const { return m_tree; }
			
 
				+		const basisu::int_vec &get_lookup() const { return m_lookup; }
			
 
				+		const basisu::int16_vec &get_tree() const { return m_tree; }
			
 
				 
			
 
				 		bool is_valid() const { return m_code_sizes.size() > 0; }
			
 
				 
			
@@ -789,7 +796,198 @@ namespace basist
 
				 	};
			
 
				 
			
 
				 	bool basis_block_format_is_uncompressed(block_format tex_type);
			
 
				-	
			
 
				+
			
 
				+	//------------------------------------
			
 
				+
			
 
				+	typedef uint16_t half_float;
			
 
				+
			
 
				+	const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
			
 
				+	const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
			
 
				+	const double MAX_HALF_FLOAT = 65504.0; // largest normal number
			
 
				+
			
 
				+	inline uint32_t get_bits(uint32_t val, int low, int high)
			
 
				+	{
			
 
				+		const int num_bits = (high - low) + 1;
			
 
				+		assert((num_bits >= 1) && (num_bits <= 32));
			
 
				+
			
 
				+		val >>= low;
			
 
				+		if (num_bits != 32)
			
 
				+			val &= ((1u << num_bits) - 1);
			
 
				+
			
 
				+		return val;
			
 
				+	}
			
 
				+
			
 
				+	inline bool is_half_inf_or_nan(half_float v)
			
 
				+	{
			
 
				+		return get_bits(v, 10, 14) == 31;
			
 
				+	}
			
 
				+
			
 
				+	inline bool is_half_denorm(half_float v)
			
 
				+	{
			
 
				+		int e = (v >> 10) & 31;
			
 
				+		return !e;
			
 
				+	}
			
 
				+
			
 
				+	inline int get_half_exp(half_float v)
			
 
				+	{
			
 
				+		int e = ((v >> 10) & 31);
			
 
				+		return e ? (e - 15) : -14;
			
 
				+	}
			
 
				+
			
 
				+	inline int get_half_mantissa(half_float v)
			
 
				+	{
			
 
				+		if (is_half_denorm(v))
			
 
				+			return v & 0x3FF;
			
 
				+		return (v & 0x3FF) | 0x400;
			
 
				+	}
			
 
				+
			
 
				+	inline float get_half_mantissaf(half_float v)
			
 
				+	{
			
 
				+		return ((float)get_half_mantissa(v)) / 1024.0f;
			
 
				+	}
			
 
				+
			
 
				+	inline int get_half_sign(half_float v)
			
 
				+	{
			
 
				+		return v ? ((v & 0x8000) ? -1 : 1) : 0;
			
 
				+	}
			
 
				+
			
 
				+	inline bool half_is_signed(half_float v)
			
 
				+	{
			
 
				+		return (v & 0x8000) != 0;
			
 
				+	}
			
 
				+
			
 
				+#if 0
			
 
				+	int hexp = get_half_exp(Cf);
			
 
				+	float hman = get_half_mantissaf(Cf);
			
 
				+	int hsign = get_half_sign(Cf);
			
 
				+	float k = powf(2.0f, hexp) * hman * hsign;
			
 
				+	if (is_half_inf_or_nan(Cf))
			
 
				+		k = std::numeric_limits<float>::quiet_NaN();
			
 
				+#endif
			
 
				+
			
 
				+	half_float float_to_half(float val);
			
 
				+
			
 
				+	inline float half_to_float(half_float hval)
			
 
				+	{
			
 
				+		union { float f; uint32_t u; } x = { 0 };
			
 
				+
			
 
				+		uint32_t s = ((uint32_t)hval >> 15) & 1;
			
 
				+		uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
			
 
				+		uint32_t m = (uint32_t)hval & 0x3FF;
			
 
				+
			
 
				+		if (!e)
			
 
				+		{
			
 
				+			if (!m)
			
 
				+			{
			
 
				+				// +- 0
			
 
				+				x.u = s << 31;
			
 
				+				return x.f;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// denormalized
			
 
				+				while (!(m & 0x00000400))
			
 
				+				{
			
 
				+					m <<= 1;
			
 
				+					--e;
			
 
				+				}
			
 
				+
			
 
				+				++e;
			
 
				+				m &= ~0x00000400;
			
 
				+			}
			
 
				+		}
			
 
				+		else if (e == 31)
			
 
				+		{
			
 
				+			if (m == 0)
			
 
				+			{
			
 
				+				// +/- INF
			
 
				+				x.u = (s << 31) | 0x7f800000;
			
 
				+				return x.f;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// +/- NaN
			
 
				+				x.u = (s << 31) | 0x7f800000 | (m << 13);
			
 
				+				return x.f;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		e = e + (127 - 15);
			
 
				+		m = m << 13;
			
 
				+
			
 
				+		assert(s <= 1);
			
 
				+		assert(m <= 0x7FFFFF);
			
 
				+		assert(e <= 255);
			
 
				+
			
 
				+		x.u = m | (e << 23) | (s << 31);
			
 
				+		return x.f;
			
 
				+	}
			
 
				+
			
 
				+	// Originally from bc6h_enc.h
			
 
				+
			
 
				+	void bc6h_enc_init();
			
 
				+
			
 
				+	const uint32_t MAX_BLOG16_VAL = 0xFFFF;
			
 
				+
			
 
				+	// BC6H internals
			
 
				+	const uint32_t NUM_BC6H_MODES = 14;
			
 
				+	const uint32_t BC6H_LAST_MODE_INDEX = 13;
			
 
				+	const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
			
 
				+	const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
			
 
				+
			
 
				+	extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
			
 
				+
			
 
				+	struct bc6h_bit_layout
			
 
				+	{
			
 
				+		int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
			
 
				+		int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
			
 
				+		int8_t m_last_bit;
			
 
				+		int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
			
 
				+	};
			
 
				+
			
 
				+	const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
			
 
				+	extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
			
 
				+
			
 
				+	extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
			
 
				+
			
 
				+	extern const uint8_t g_bc6h_weight3[8];
			
 
				+	extern const uint8_t g_bc6h_weight4[16];
			
 
				+
			
 
				+	extern const int8_t g_bc6h_mode_lookup[32];
			
 
				+		
			
 
				+	// Converts b16 to half float
			
 
				+	inline half_float bc6h_blog16_to_half(uint32_t comp)
			
 
				+	{
			
 
				+		assert(comp <= 0xFFFF);
			
 
				+
			
 
				+		// scale the magnitude by 31/64
			
 
				+		comp = (comp * 31u) >> 6u;
			
 
				+		return (half_float)comp;
			
 
				+	}
			
 
				+
			
 
				+	const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
			
 
				+
			
 
				+	// Inverts bc6h_blog16_to_half().
			
 
				+	// Returns the nearest blog16 given a half value. 
			
 
				+	inline uint32_t bc6h_half_to_blog16(half_float h)
			
 
				+	{
			
 
				+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
			
 
				+		return (h * 64 + 30) / 31;
			
 
				+	}
			
 
				+
			
 
				+	struct bc6h_block
			
 
				+	{
			
 
				+		uint8_t m_bytes[16];
			
 
				+	};
			
 
				+
			
 
				+	void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
			
 
				+	void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
			
 
				+	void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
			
 
				+	void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
			
 
				+	void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
			
 
				+	void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
			
 
				+	bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
			
 
				+		
			
 
				 } // namespace basist
			
 
				 
			
 
				 
			
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
@@ -1,4 +1,4 @@
 
				-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
@@ -1,4 +1,4 @@
 
				-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
			
 
				+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 // you may not use this file except in compliance with the License.
			
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
@@ -13,6 +13,7 @@ namespace basist
 
				 	const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8;
			
 
				 
			
 
				 	const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30;
			
 
				+	const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit
			
 
				 	const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11;
			
 
				 	const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19;