2 tahun lalu · 5a3f955e05
--- a/modules/astcenc/SCsub
+++ b/modules/astcenc/SCsub
@@ -29,7 +29,6 @@ thirdparty_sources = [
 
				     "astcenc_partition_tables.cpp",
			
 
				     "astcenc_percentile_tables.cpp",
			
 
				     "astcenc_pick_best_endpoint_format.cpp",
			
 
				-    "astcenc_platform_isa_detection.cpp",
			
 
				     "astcenc_quantization.cpp",
			
 
				     "astcenc_symbolic_physical.cpp",
			
 
				     "astcenc_weight_align.cpp",
			
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -20,7 +20,7 @@ Files extracted from upstream source:
 
				 ## astcenc
			
 
				 
			
 
				 - Upstream: https://github.com/ARM-software/astc-encoder
			
 
				-- Version: 4.3.0 (ec83dda79fcefe07f69cdae7ed980d169bf2c4d4, 2023)
			
 
				+- Version: 4.4.0 (5a5b5a1ef60dd47c27c28c66c118d22c40e3197e, 2023)
			
 
				 - License: Apache 2.0
			
 
				 
			
 
				 Files extracted from upstream source:
			
--- a/thirdparty/astcenc/astcenc.h
+++ b/thirdparty/astcenc/astcenc.h
@@ -43,6 +43,14 @@
 
				  *       for faster processing. The caller is responsible for creating the worker threads, and
			
 
				  *       synchronizing between images.
			
 
				  *
			
 
				+ * Extended instruction set support
			
 
				+ * ================================
			
 
				+ *
			
 
				+ * This library supports use of extended instruction sets, such as SSE4.1 and AVX2. These are
			
 
				+ * enabled at compile time when building the library. There is no runtime checking in the core
			
 
				+ * library that the instruction sets used are actually available. Checking compatibility is the
			
 
				+ * responsibility of the calling code.
			
 
				+ *
			
 
				  * Threading
			
 
				  * =========
			
 
				  *
			
@@ -191,8 +199,6 @@ enum astcenc_error {
 
				 	ASTCENC_ERR_OUT_OF_MEM,
			
 
				 	/** @brief The call failed due to the build using fast math. */
			
 
				 	ASTCENC_ERR_BAD_CPU_FLOAT,
			
 
				-	/** @brief The call failed due to the build using an unsupported ISA. */
			
 
				-	ASTCENC_ERR_BAD_CPU_ISA,
			
 
				 	/** @brief The call failed due to an out-of-spec parameter. */
			
 
				 	ASTCENC_ERR_BAD_PARAM,
			
 
				 	/** @brief The call failed due to an out-of-spec block size. */
			
@@ -472,7 +478,7 @@ struct astcenc_config
 
				 	/**
			
 
				 	 * @brief The number of trial candidates per mode search (-candidatelimit).
			
 
				 	 *
			
 
				-	 * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES (default 4).
			
 
				+	 * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES.
			
 
				 	 */
			
 
				 	unsigned int tune_candidate_limit;
			
 
				 
			
@@ -520,21 +526,21 @@ struct astcenc_config
 
				 	 *
			
 
				 	 * This option is further scaled for normal maps, so it skips less often.
			
 
				 	 */
			
 
				-	float tune_2_partition_early_out_limit_factor;
			
 
				+	float tune_2partition_early_out_limit_factor;
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The threshold for skipping 4.1 trials (-3partitionlimitfactor).
			
 
				 	 *
			
 
				 	 * This option is further scaled for normal maps, so it skips less often.
			
 
				 	 */
			
 
				-	float tune_3_partition_early_out_limit_factor;
			
 
				+	float tune_3partition_early_out_limit_factor;
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The threshold for skipping two weight planes (-2planelimitcorrelation).
			
 
				 	 *
			
 
				 	 * This option is ineffective for normal maps.
			
 
				 	 */
			
 
				-	float tune_2_plane_early_out_limit_correlation;
			
 
				+	float tune_2plane_early_out_limit_correlation;
			
 
				 
			
 
				 #if defined(ASTCENC_DIAGNOSTICS)
			
 
				 	/**
			
--- a/thirdparty/astcenc/astcenc_averages_and_directions.cpp
+++ b/thirdparty/astcenc/astcenc_averages_and_directions.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2011-2022 Arm Limited
			
 
				+// Copyright 2011-2023 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -725,8 +725,7 @@ void compute_error_squared_rgba(
 
				 	const image_block& blk,
			
 
				 	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
			
 
				 	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
			
 
				-	float uncor_lengths[BLOCK_MAX_PARTITIONS],
			
 
				-	float samec_lengths[BLOCK_MAX_PARTITIONS],
			
 
				+	float line_lengths[BLOCK_MAX_PARTITIONS],
			
 
				 	float& uncor_error,
			
 
				 	float& samec_error
			
 
				 ) {
			
@@ -740,12 +739,6 @@ void compute_error_squared_rgba(
 
				 	{
			
 
				 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
			
 
				 
			
 
				-		float uncor_loparam = 1e10f;
			
 
				-		float uncor_hiparam = -1e10f;
			
 
				-
			
 
				-		float samec_loparam = 1e10f;
			
 
				-		float samec_hiparam = -1e10f;
			
 
				-
			
 
				 		processed_line4 l_uncor = uncor_plines[partition];
			
 
				 		processed_line4 l_samec = samec_plines[partition];
			
 
				 
			
@@ -773,9 +766,6 @@ void compute_error_squared_rgba(
 
				 		vfloat uncor_loparamv(1e10f);
			
 
				 		vfloat uncor_hiparamv(-1e10f);
			
 
				 
			
 
				-		vfloat samec_loparamv(1e10f);
			
 
				-		vfloat samec_hiparamv(-1e10f);
			
 
				-
			
 
				 		vfloat ew_r(blk.channel_weight.lane<0>());
			
 
				 		vfloat ew_g(blk.channel_weight.lane<1>());
			
 
				 		vfloat ew_b(blk.channel_weight.lane<2>());
			
@@ -825,9 +815,6 @@ void compute_error_squared_rgba(
 
				 			                   + (data_b * l_samec_bs2)
			
 
				 			                   + (data_a * l_samec_bs3);
			
 
				 
			
 
				-			samec_loparamv = min(samec_param, samec_loparamv);
			
 
				-			samec_hiparamv = max(samec_param, samec_hiparamv);
			
 
				-
			
 
				 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
			
 
				 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
			
 
				 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
			
@@ -843,18 +830,9 @@ void compute_error_squared_rgba(
 
				 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
			
 
				 		}
			
 
				 
			
 
				-		uncor_loparam = hmin_s(uncor_loparamv);
			
 
				-		uncor_hiparam = hmax_s(uncor_hiparamv);
			
 
				-
			
 
				-		samec_loparam = hmin_s(samec_loparamv);
			
 
				-		samec_hiparam = hmax_s(samec_hiparamv);
			
 
				-
			
 
				-		float uncor_linelen = uncor_hiparam - uncor_loparam;
			
 
				-		float samec_linelen = samec_hiparam - samec_loparam;
			
 
				-
			
 
				 		// Turn very small numbers and NaNs into a small number
			
 
				-		uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
			
 
				-		samec_lengths[partition] = astc::max(samec_linelen, 1e-7f);
			
 
				+		float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
			
 
				+		line_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
			
 
				 	}
			
 
				 
			
 
				 	uncor_error = hadd_s(uncor_errorsumv);
			
@@ -882,19 +860,9 @@ void compute_error_squared_rgb(
 
				 		unsigned int texel_count = pi.partition_texel_count[partition];
			
 
				 		promise(texel_count > 0);
			
 
				 
			
 
				-		float uncor_loparam = 1e10f;
			
 
				-		float uncor_hiparam = -1e10f;
			
 
				-
			
 
				-		float samec_loparam = 1e10f;
			
 
				-		float samec_hiparam = -1e10f;
			
 
				-
			
 
				 		processed_line3 l_uncor = pl.uncor_pline;
			
 
				 		processed_line3 l_samec = pl.samec_pline;
			
 
				 
			
 
				-		// This implementation is an example vectorization of this function.
			
 
				-		// It works for - the codec is a 2-4% faster than not vectorizing - but
			
 
				-		// the benefit is limited by the use of gathers and register pressure
			
 
				-
			
 
				 		// Vectorize some useful scalar inputs
			
 
				 		vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
			
 
				 		vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
			
@@ -913,9 +881,6 @@ void compute_error_squared_rgb(
 
				 		vfloat uncor_loparamv(1e10f);
			
 
				 		vfloat uncor_hiparamv(-1e10f);
			
 
				 
			
 
				-		vfloat samec_loparamv(1e10f);
			
 
				-		vfloat samec_hiparamv(-1e10f);
			
 
				-
			
 
				 		vfloat ew_r(blk.channel_weight.lane<0>());
			
 
				 		vfloat ew_g(blk.channel_weight.lane<1>());
			
 
				 		vfloat ew_b(blk.channel_weight.lane<2>());
			
@@ -958,9 +923,6 @@ void compute_error_squared_rgb(
 
				 			                   + (data_g * l_samec_bs1)
			
 
				 			                   + (data_b * l_samec_bs2);
			
 
				 
			
 
				-			samec_loparamv = min(samec_param, samec_loparamv);
			
 
				-			samec_hiparamv = max(samec_param, samec_hiparamv);
			
 
				-
			
 
				 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
			
 
				 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
			
 
				 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
			
@@ -974,18 +936,9 @@ void compute_error_squared_rgb(
 
				 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
			
 
				 		}
			
 
				 
			
 
				-		uncor_loparam = hmin_s(uncor_loparamv);
			
 
				-		uncor_hiparam = hmax_s(uncor_hiparamv);
			
 
				-
			
 
				-		samec_loparam = hmin_s(samec_loparamv);
			
 
				-		samec_hiparam = hmax_s(samec_hiparamv);
			
 
				-
			
 
				-		float uncor_linelen = uncor_hiparam - uncor_loparam;
			
 
				-		float samec_linelen = samec_hiparam - samec_loparam;
			
 
				-
			
 
				 		// Turn very small numbers and NaNs into a small number
			
 
				-		pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f);
			
 
				-		pl.samec_line_len = astc::max(samec_linelen, 1e-7f);
			
 
				+		float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
			
 
				+		pl.line_length = astc::max(uncor_linelen, 1e-7f);
			
 
				 	}
			
 
				 
			
 
				 	uncor_error = hadd_s(uncor_errorsumv);
			
--- a/thirdparty/astcenc/astcenc_block_sizes.cpp
+++ b/thirdparty/astcenc/astcenc_block_sizes.cpp
@@ -776,8 +776,8 @@ static void construct_dt_entry_2d(
 
				 	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
			
 
				 	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
			
 
				 	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
			
 
				-	bsd.decimation_modes[index].refprec_1_plane = 0;
			
 
				-	bsd.decimation_modes[index].refprec_2_planes = 0;
			
 
				+	bsd.decimation_modes[index].refprec_1plane = 0;
			
 
				+	bsd.decimation_modes[index].refprec_2planes = 0;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -934,11 +934,11 @@ static void construct_block_size_descriptor_2d(
 
				 
			
 
				 			if (is_dual_plane)
			
 
				 			{
			
 
				-				dm.set_ref_2_plane(bm.get_weight_quant_mode());
			
 
				+				dm.set_ref_2plane(bm.get_weight_quant_mode());
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				dm.set_ref_1_plane(bm.get_weight_quant_mode());
			
 
				+				dm.set_ref_1plane(bm.get_weight_quant_mode());
			
 
				 			}
			
 
				 
			
 
				 			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
			
@@ -969,8 +969,8 @@ static void construct_block_size_descriptor_2d(
 
				 	{
			
 
				 		bsd.decimation_modes[i].maxprec_1plane = -1;
			
 
				 		bsd.decimation_modes[i].maxprec_2planes = -1;
			
 
				-		bsd.decimation_modes[i].refprec_1_plane = 0;
			
 
				-		bsd.decimation_modes[i].refprec_2_planes = 0;
			
 
				+		bsd.decimation_modes[i].refprec_1plane = 0;
			
 
				+		bsd.decimation_modes[i].refprec_2planes = 0;
			
 
				 	}
			
 
				 
			
 
				 	// Determine the texels to use for kmeans clustering.
			
@@ -1055,8 +1055,8 @@ static void construct_block_size_descriptor_3d(
 
				 
			
 
				 				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
			
 
				 				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
			
 
				-				bsd.decimation_modes[decimation_mode_count].refprec_1_plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
			
 
				-				bsd.decimation_modes[decimation_mode_count].refprec_2_planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
			
 
				+				bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
			
 
				+				bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
			
 
				 				decimation_mode_count++;
			
 
				 			}
			
 
				 		}
			
@@ -1067,8 +1067,8 @@ static void construct_block_size_descriptor_3d(
 
				 	{
			
 
				 		bsd.decimation_modes[i].maxprec_1plane = -1;
			
 
				 		bsd.decimation_modes[i].maxprec_2planes = -1;
			
 
				-		bsd.decimation_modes[i].refprec_1_plane = 0;
			
 
				-		bsd.decimation_modes[i].refprec_2_planes = 0;
			
 
				+		bsd.decimation_modes[i].refprec_1plane = 0;
			
 
				+		bsd.decimation_modes[i].refprec_2planes = 0;
			
 
				 	}
			
 
				 
			
 
				 	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
			
--- a/thirdparty/astcenc/astcenc_color_quantize.cpp
+++ b/thirdparty/astcenc/astcenc_color_quantize.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2011-2021 Arm Limited
			
 
				+// Copyright 2011-2023 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -44,17 +44,43 @@
 
				  * @brief Determine the quantized value given a quantization level.
			
 
				  *
			
 
				  * @param quant_level   The quantization level to use.
			
 
				- * @param value         The value to convert. This may be outside of the 0-255 range and will be
			
 
				- *                      clamped before the value is looked up.
			
 
				+ * @param value         The value to convert. This must be in the 0-255 range.
			
 
				  *
			
 
				- * @return The encoded quantized value. These are not necessarily in order; the compressor
			
 
				- *         scrambles the values slightly to make hardware implementation easier.
			
 
				+ * @return The unpacked quantized value, returned in 0-255 range.
			
 
				  */
			
 
				 static inline uint8_t quant_color(
			
 
				 	quant_method quant_level,
			
 
				 	int value
			
 
				 ) {
			
 
				-	return color_unquant_to_uquant_tables[quant_level - QUANT_6][value];
			
 
				+	int index = value * 2 + 1;
			
 
				+	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @brief Determine the quantized value given a quantization level and residual.
			
 
				+ *
			
 
				+ * @param quant_level   The quantization level to use.
			
 
				+ * @param value         The value to convert. This must be in the 0-255 range.
			
 
				+ * @param valuef        The original value before rounding, used to compute a residual.
			
 
				+ *
			
 
				+ * @return The unpacked quantized value, returned in 0-255 range.
			
 
				+ */
			
 
				+static inline uint8_t quant_color(
			
 
				+	quant_method quant_level,
			
 
				+	int value,
			
 
				+	float valuef
			
 
				+) {
			
 
				+	int index = value * 2;
			
 
				+
			
 
				+	// Compute the residual to determine if we should round down or up ties.
			
 
				+	// Test should be residual >= 0, but empirical testing shows small bias helps.
			
 
				+	float residual = valuef - static_cast<float>(value);
			
 
				+	if (residual >= -0.1f)
			
 
				+	{
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -86,16 +112,16 @@ static void quantize_rgb(
 
				 	float b1 = astc::clamp255f(color1.lane<2>() * scale);
			
 
				 
			
 
				 	int ri0, gi0, bi0, ri1, gi1, bi1;
			
 
				-	float rgb0_addon = 0.5f;
			
 
				-	float rgb1_addon = 0.5f;
			
 
				+	float rgb0_addon = 0.0f;
			
 
				+	float rgb1_addon = 0.0f;
			
 
				 	do
			
 
				 	{
			
 
				-		ri0 = quant_color(quant_level, astc::max(astc::flt2int_rd(r0 + rgb0_addon), 0));
			
 
				-		gi0 = quant_color(quant_level, astc::max(astc::flt2int_rd(g0 + rgb0_addon), 0));
			
 
				-		bi0 = quant_color(quant_level, astc::max(astc::flt2int_rd(b0 + rgb0_addon), 0));
			
 
				-		ri1 = quant_color(quant_level, astc::min(astc::flt2int_rd(r1 + rgb1_addon), 255));
			
 
				-		gi1 = quant_color(quant_level, astc::min(astc::flt2int_rd(g1 + rgb1_addon), 255));
			
 
				-		bi1 = quant_color(quant_level, astc::min(astc::flt2int_rd(b1 + rgb1_addon), 255));
			
 
				+		ri0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(r0 + rgb0_addon), 0), r0 + rgb0_addon);
			
 
				+		gi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(g0 + rgb0_addon), 0), g0 + rgb0_addon);
			
 
				+		bi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(b0 + rgb0_addon), 0), b0 + rgb0_addon);
			
 
				+		ri1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(r1 + rgb1_addon), 255), r1 + rgb1_addon);
			
 
				+		gi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(g1 + rgb1_addon), 255), g1 + rgb1_addon);
			
 
				+		bi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(b1 + rgb1_addon), 255), b1 + rgb1_addon);
			
 
				 
			
 
				 		rgb0_addon -= 0.2f;
			
 
				 		rgb1_addon += 0.2f;
			
@@ -133,8 +159,8 @@ static void quantize_rgba(
 
				 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
			
 
				 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
			
 
				 
			
 
				-	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0));
			
 
				-	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1));
			
 
				+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
			
 
				+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
			
 
				 
			
 
				 	quantize_rgb(color0, color1, output, quant_level);
			
 
				 }
			
@@ -180,13 +206,13 @@ static bool try_quantize_rgb_blue_contract(
 
				 	}
			
 
				 
			
 
				 	// Quantize the inverse-blue-contracted color
			
 
				-	int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0));
			
 
				-	int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0));
			
 
				-	int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0));
			
 
				+	int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0), r0);
			
 
				+	int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0), g0);
			
 
				+	int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0), b0);
			
 
				 
			
 
				-	int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1));
			
 
				-	int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1));
			
 
				-	int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1));
			
 
				+	int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1), r1);
			
 
				+	int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1), g1);
			
 
				+	int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1), b1);
			
 
				 
			
 
				 	// If color #1 is not larger than color #0 then blue-contraction cannot be used. Note that
			
 
				 	// blue-contraction and quantization change this order, which is why we must test afterwards.
			
@@ -217,7 +243,7 @@ static bool try_quantize_rgb_blue_contract(
 
				  *
			
 
				  * @return Returns @c false on failure, @c true on success.
			
 
				  */
			
 
				-static int try_quantize_rgba_blue_contract(
			
 
				+static bool try_quantize_rgba_blue_contract(
			
 
				 	vfloat4 color0,
			
 
				 	vfloat4 color1,
			
 
				 	uint8_t output[8],
			
@@ -228,8 +254,8 @@ static int try_quantize_rgba_blue_contract(
 
				 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
			
 
				 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
			
 
				 
			
 
				-	output[6] = quant_color(quant_level, astc::flt2int_rtn(a1));
			
 
				-	output[7] = quant_color(quant_level, astc::flt2int_rtn(a0));
			
 
				+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
			
 
				+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
			
 
				 
			
 
				 	return try_quantize_rgb_blue_contract(color0, color1, output, quant_level);
			
 
				 }
			
@@ -433,7 +459,7 @@ static bool try_quantize_rgb_delta_blue_contract(
 
				 	g1d |= (g0b & 0x100) >> 1;
			
 
				 	b1d |= (b0b & 0x100) >> 1;
			
 
				 
			
 
				-	// Then quantize and  unquantize; if this causes any of the top two bits to flip,
			
 
				+	// Then quantize and unquantize; if this causes any of the top two bits to flip,
			
 
				 	// then encoding fails, since we have then corrupted either the top bit of the base
			
 
				 	// or the sign bit of the offset.
			
 
				 	int r1de = quant_color(quant_level, r1d);
			
@@ -728,9 +754,9 @@ static void quantize_rgbs(
 
				 	float g = astc::clamp255f(color.lane<1>() * scale);
			
 
				 	float b = astc::clamp255f(color.lane<2>() * scale);
			
 
				 
			
 
				-	int ri = quant_color(quant_level, astc::flt2int_rtn(r));
			
 
				-	int gi = quant_color(quant_level, astc::flt2int_rtn(g));
			
 
				-	int bi = quant_color(quant_level, astc::flt2int_rtn(b));
			
 
				+	int ri = quant_color(quant_level, astc::flt2int_rtn(r), r);
			
 
				+	int gi = quant_color(quant_level, astc::flt2int_rtn(g), g);
			
 
				+	int bi = quant_color(quant_level, astc::flt2int_rtn(b), b);
			
 
				 
			
 
				 	float oldcolorsum = hadd_rgb_s(color) * scale;
			
 
				 	float newcolorsum = static_cast<float>(ri + gi + bi);
			
@@ -764,8 +790,8 @@ static void quantize_rgbs_alpha(
 
				 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
			
 
				 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
			
 
				 
			
 
				-	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0));
			
 
				-	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1));
			
 
				+	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
			
 
				+	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
			
 
				 
			
 
				 	quantize_rgbs(color, output, quant_level);
			
 
				 }
			
@@ -799,8 +825,8 @@ static void quantize_luminance(
 
				 		lum1 = avg;
			
 
				 	}
			
 
				 
			
 
				-	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0));
			
 
				-	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1));
			
 
				+	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
			
 
				+	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -828,48 +854,10 @@ static void quantize_luminance_alpha(
 
				 	float a0 = astc::clamp255f(color0.lane<3>());
			
 
				 	float a1 = astc::clamp255f(color1.lane<3>());
			
 
				 
			
 
				-	// If endpoints are close then pull apart slightly; this gives > 8 bit normal map precision.
			
 
				-	if (quant_level > 18)
			
 
				-	{
			
 
				-		if (fabsf(lum0 - lum1) < 3.0f)
			
 
				-		{
			
 
				-			if (lum0 < lum1)
			
 
				-			{
			
 
				-				lum0 -= 0.5f;
			
 
				-				lum1 += 0.5f;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				lum0 += 0.5f;
			
 
				-				lum1 -= 0.5f;
			
 
				-			}
			
 
				-
			
 
				-			lum0 = astc::clamp255f(lum0);
			
 
				-			lum1 = astc::clamp255f(lum1);
			
 
				-		}
			
 
				-
			
 
				-		if (fabsf(a0 - a1) < 3.0f)
			
 
				-		{
			
 
				-			if (a0 < a1)
			
 
				-			{
			
 
				-				a0 -= 0.5f;
			
 
				-				a1 += 0.5f;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				a0 += 0.5f;
			
 
				-				a1 -= 0.5f;
			
 
				-			}
			
 
				-
			
 
				-			a0 = astc::clamp255f(a0);
			
 
				-			a1 = astc::clamp255f(a1);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0));
			
 
				-	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1));
			
 
				-	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0));
			
 
				-	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1));
			
 
				+	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
			
 
				+	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
			
 
				+	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
			
 
				+	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -1661,8 +1649,8 @@ static void quantize_hdr_rgb_ldr_alpha(
 
				 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
			
 
				 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
			
 
				 
			
 
				-	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0));
			
 
				-	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1));
			
 
				+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
			
 
				+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
			
 
				 
			
 
				 	quantize_hdr_rgb(color0, color1, output, quant_level);
			
 
				 }
			
--- a/thirdparty/astcenc/astcenc_compress_symbolic.cpp
+++ b/thirdparty/astcenc/astcenc_compress_symbolic.cpp
@@ -391,7 +391,7 @@ static float compress_symbolic_block_for_partition_1plane(
 
				 	for (unsigned int i = 0; i < max_decimation_modes; i++)
			
 
				 	{
			
 
				 		const auto& dm = bsd.get_decimation_mode(i);
			
 
				-		if (!dm.is_ref_1_plane(static_cast<quant_method>(max_weight_quant)))
			
 
				+		if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
			
 
				 		{
			
 
				 			continue;
			
 
				 		}
			
@@ -561,7 +561,7 @@ static float compress_symbolic_block_for_partition_1plane(
 
				 			workscb.color_formats_matched = 0;
			
 
				 			if (partition_count >= 2 && all_same)
			
 
				 			{
			
 
				-				uint8_t colorvals[BLOCK_MAX_PARTITIONS][12];
			
 
				+				uint8_t colorvals[BLOCK_MAX_PARTITIONS][8];
			
 
				 				uint8_t color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 };
			
 
				 				bool all_same_mod = true;
			
 
				 				for (unsigned int j = 0; j < partition_count; j++)
			
@@ -743,7 +743,7 @@ static float compress_symbolic_block_for_partition_2planes(
 
				 	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
			
 
				 	{
			
 
				 		const auto& dm = bsd.get_decimation_mode(i);
			
 
				-		if (!dm.is_ref_2_plane(static_cast<quant_method>(max_weight_quant)))
			
 
				+		if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
			
 
				 		{
			
 
				 			continue;
			
 
				 		}
			
@@ -1263,8 +1263,8 @@ void compress_block(
 
				 
			
 
				 	float exit_thresholds_for_pcount[BLOCK_MAX_PARTITIONS] {
			
 
				 		0.0f,
			
 
				-		ctx.config.tune_2_partition_early_out_limit_factor,
			
 
				-		ctx.config.tune_3_partition_early_out_limit_factor,
			
 
				+		ctx.config.tune_2partition_early_out_limit_factor,
			
 
				+		ctx.config.tune_3partition_early_out_limit_factor,
			
 
				 		0.0f
			
 
				 	};
			
 
				 
			
@@ -1318,7 +1318,7 @@ void compress_block(
 
				 	lowest_correl = prepare_block_statistics(bsd.texel_count, blk);
			
 
				 #endif
			
 
				 
			
 
				-	block_skip_two_plane = lowest_correl > ctx.config.tune_2_plane_early_out_limit_correlation;
			
 
				+	block_skip_two_plane = lowest_correl > ctx.config.tune_2plane_early_out_limit_correlation;
			
 
				 
			
 
				 	// Test the four possible 1-partition, 2-planes modes. Do this in reverse, as
			
 
				 	// alpha is the most likely to be non-correlated if it is present in the data.
			
@@ -1331,7 +1331,7 @@ void compress_block(
 
				 
			
 
				 		if (block_skip_two_plane)
			
 
				 		{
			
 
				-			trace_add_data("skip", "tune_2_plane_early_out_limit_correlation");
			
 
				+			trace_add_data("skip", "tune_2plane_early_out_limit_correlation");
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
--- a/thirdparty/astcenc/astcenc_diagnostic_trace.cpp
+++ b/thirdparty/astcenc/astcenc_diagnostic_trace.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2021-2022 Arm Limited
			
 
				+// Copyright 2021-2023 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -24,6 +24,8 @@
 
				 #include <cassert>
			
 
				 #include <cstdarg>
			
 
				 #include <cstdio>
			
 
				+#include <cmath>
			
 
				+#include <limits>
			
 
				 #include <string>
			
 
				 
			
 
				 #include "astcenc_diagnostic_trace.h"
			
@@ -203,7 +205,20 @@ void trace_add_data(
 
				 	const char* key,
			
 
				 	float value
			
 
				 ) {
			
 
				-  	char buffer[256];
			
 
				+	// Turn infinities into parseable values
			
 
				+	if (std::isinf(value))
			
 
				+	{
			
 
				+		if (value > 0.0f)
			
 
				+		{
			
 
				+			value = std::numeric_limits<float>::max();
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			value = -std::numeric_limits<float>::max();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	char buffer[256];
			
 
				 	sprintf(buffer, "%.20g", (double)value);
			
 
				 	TraceNode* node = g_TraceLog->get_current_leaf();
			
 
				 	node->add_attrib("float", key, buffer);
			
--- a/thirdparty/astcenc/astcenc_entry.cpp
+++ b/thirdparty/astcenc/astcenc_entry.cpp
@@ -52,9 +52,9 @@ struct astcenc_preset_config
 
				 	float tune_db_limit_a_base;
			
 
				 	float tune_db_limit_b_base;
			
 
				 	float tune_mse_overshoot;
			
 
				-	float tune_2_partition_early_out_limit_factor;
			
 
				-	float tune_3_partition_early_out_limit_factor;
			
 
				-	float tune_2_plane_early_out_limit_correlation;
			
 
				+	float tune_2partition_early_out_limit_factor;
			
 
				+	float tune_3partition_early_out_limit_factor;
			
 
				+	float tune_2plane_early_out_limit_correlation;
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -157,48 +157,6 @@ static astcenc_error validate_cpu_float()
 
				 	return ASTCENC_SUCCESS;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * @brief Validate CPU ISA support meets the requirements of this build of the library.
			
 
				- *
			
 
				- * Each library build is statically compiled for a particular set of CPU ISA features, such as the
			
 
				- * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
			
 
				- * actually supports everything this build needs.
			
 
				- *
			
 
				- * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
			
 
				- */
			
 
				-static astcenc_error validate_cpu_isa()
			
 
				-{
			
 
				-	#if ASTCENC_SSE >= 41
			
 
				-		if (!cpu_supports_sse41())
			
 
				-		{
			
 
				-			return ASTCENC_ERR_BAD_CPU_ISA;
			
 
				-		}
			
 
				-	#endif
			
 
				-
			
 
				-	#if ASTCENC_POPCNT >= 1
			
 
				-		if (!cpu_supports_popcnt())
			
 
				-		{
			
 
				-			return ASTCENC_ERR_BAD_CPU_ISA;
			
 
				-		}
			
 
				-	#endif
			
 
				-
			
 
				-	#if ASTCENC_F16C >= 1
			
 
				-		if (!cpu_supports_f16c())
			
 
				-		{
			
 
				-			return ASTCENC_ERR_BAD_CPU_ISA;
			
 
				-		}
			
 
				-	#endif
			
 
				-
			
 
				-	#if ASTCENC_AVX >= 2
			
 
				-		if (!cpu_supports_avx2())
			
 
				-		{
			
 
				-			return ASTCENC_ERR_BAD_CPU_ISA;
			
 
				-		}
			
 
				-	#endif
			
 
				-
			
 
				-	return ASTCENC_SUCCESS;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * @brief Validate config profile.
			
 
				  *
			
@@ -439,9 +397,9 @@ static astcenc_error validate_config(
 
				 	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
			
 
				 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
			
 
				 	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
			
 
				-	config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f);
			
 
				-	config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f);
			
 
				-	config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f);
			
 
				+	config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
			
 
				+	config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
			
 
				+	config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
			
 
				 
			
 
				 	// Specifying a zero weight color component is not allowed; force to small value
			
 
				 	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
			
@@ -475,14 +433,6 @@ astcenc_error astcenc_config_init(
 
				 ) {
			
 
				 	astcenc_error status;
			
 
				 
			
 
				-	// Check basic library compatibility options here so they are checked early. Note, these checks
			
 
				-	// are repeated in context_alloc for cases where callers use a manually defined config struct
			
 
				-	status = validate_cpu_isa();
			
 
				-	if (status != ASTCENC_SUCCESS)
			
 
				-	{
			
 
				-		return status;
			
 
				-	}
			
 
				-
			
 
				 	status = validate_cpu_float();
			
 
				 	if (status != ASTCENC_SUCCESS)
			
 
				 	{
			
@@ -563,9 +513,9 @@ astcenc_error astcenc_config_init(
 
				 
			
 
				 		config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
			
 
				 
			
 
				-		config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
			
 
				-		config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
			
 
				-		config.tune_2_plane_early_out_limit_correlation = (*preset_configs)[start].tune_2_plane_early_out_limit_correlation;
			
 
				+		config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
			
 
				+		config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
			
 
				+		config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
			
 
				 	}
			
 
				 	// Start and end node are not the same - so interpolate between them
			
 
				 	else
			
@@ -605,9 +555,9 @@ astcenc_error astcenc_config_init(
 
				 
			
 
				 		config.tune_mse_overshoot = LERP(tune_mse_overshoot);
			
 
				 
			
 
				-		config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
			
 
				-		config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);
			
 
				-		config.tune_2_plane_early_out_limit_correlation = LERP(tune_2_plane_early_out_limit_correlation);
			
 
				+		config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
			
 
				+		config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
			
 
				+		config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
			
 
				 		#undef LERP
			
 
				 		#undef LERPI
			
 
				 		#undef LERPUI
			
@@ -656,9 +606,9 @@ astcenc_error astcenc_config_init(
 
				 
			
 
				 		config.cw_g_weight = 0.0f;
			
 
				 		config.cw_b_weight = 0.0f;
			
 
				-		config.tune_2_partition_early_out_limit_factor *= 1.5f;
			
 
				-		config.tune_3_partition_early_out_limit_factor *= 1.5f;
			
 
				-		config.tune_2_plane_early_out_limit_correlation = 0.99f;
			
 
				+		config.tune_2partition_early_out_limit_factor *= 1.5f;
			
 
				+		config.tune_3partition_early_out_limit_factor *= 1.5f;
			
 
				+		config.tune_2plane_early_out_limit_correlation = 0.99f;
			
 
				 
			
 
				 		// Normals are prone to blocking artifacts on smooth curves
			
 
				 		// so force compressor to try harder here ...
			
@@ -702,12 +652,6 @@ astcenc_error astcenc_context_alloc(
 
				 	astcenc_error status;
			
 
				 	const astcenc_config& config = *configp;
			
 
				 
			
 
				-	status = validate_cpu_isa();
			
 
				-	if (status != ASTCENC_SUCCESS)
			
 
				-	{
			
 
				-		return status;
			
 
				-	}
			
 
				-
			
 
				 	status = validate_cpu_float();
			
 
				 	if (status != ASTCENC_SUCCESS)
			
 
				 	{
			
@@ -1399,8 +1343,6 @@ const char* astcenc_get_error_string(
 
				 		return "ASTCENC_ERR_OUT_OF_MEM";
			
 
				 	case ASTCENC_ERR_BAD_CPU_FLOAT:
			
 
				 		return "ASTCENC_ERR_BAD_CPU_FLOAT";
			
 
				-	case ASTCENC_ERR_BAD_CPU_ISA:
			
 
				-		return "ASTCENC_ERR_BAD_CPU_ISA";
			
 
				 	case ASTCENC_ERR_BAD_PARAM:
			
 
				 		return "ASTCENC_ERR_BAD_PARAM";
			
 
				 	case ASTCENC_ERR_BAD_BLOCK_SIZE:
			
--- a/thirdparty/astcenc/astcenc_find_best_partitioning.cpp
+++ b/thirdparty/astcenc/astcenc_find_best_partitioning.cpp
@@ -604,8 +604,7 @@ unsigned int find_best_partition_candidates(
 
				 			processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS];
			
 
				 			processed_line4 samec_plines[BLOCK_MAX_PARTITIONS];
			
 
				 
			
 
				-			float uncor_line_lens[BLOCK_MAX_PARTITIONS];
			
 
				-			float samec_line_lens[BLOCK_MAX_PARTITIONS];
			
 
				+			float line_lengths[BLOCK_MAX_PARTITIONS];
			
 
				 
			
 
				 			for (unsigned int j = 0; j < partition_count; j++)
			
 
				 			{
			
@@ -631,8 +630,7 @@ unsigned int find_best_partition_candidates(
 
				 			                           blk,
			
 
				 			                           uncor_plines,
			
 
				 			                           samec_plines,
			
 
				-			                           uncor_line_lens,
			
 
				-			                           samec_line_lens,
			
 
				+			                           line_lengths,
			
 
				 			                           uncor_error,
			
 
				 			                           samec_error);
			
 
				 
			
@@ -651,8 +649,8 @@ unsigned int find_best_partition_candidates(
 
				 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
			
 
				 				vfloat4 error_weights(tpp * weight_imprecision_estim);
			
 
				 
			
 
				-				vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j];
			
 
				-				vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j];
			
 
				+				vfloat4 uncor_vector = uncor_lines[j].b * line_lengths[j];
			
 
				+				vfloat4 samec_vector = samec_lines[j].b * line_lengths[j];
			
 
				 
			
 
				 				uncor_error += dot_s(uncor_vector * uncor_vector, error_weights);
			
 
				 				samec_error += dot_s(samec_vector * samec_vector, error_weights);
			
@@ -719,8 +717,8 @@ unsigned int find_best_partition_candidates(
 
				 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
			
 
				 				vfloat4 error_weights(tpp * weight_imprecision_estim);
			
 
				 
			
 
				-				vfloat4 uncor_vector = pl.uncor_line.b * pl.uncor_line_len;
			
 
				-				vfloat4 samec_vector = pl.samec_line.b * pl.samec_line_len;
			
 
				+				vfloat4 uncor_vector = pl.uncor_line.b * pl.line_length;
			
 
				+				vfloat4 samec_vector = pl.samec_line.b * pl.line_length;
			
 
				 
			
 
				 				uncor_error += dot3_s(uncor_vector * uncor_vector, error_weights);
			
 
				 				samec_error += dot3_s(samec_vector * samec_vector, error_weights);
			
@@ -731,21 +729,11 @@ unsigned int find_best_partition_candidates(
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	bool best_is_uncor = uncor_best_partitions[0] > samec_best_partitions[0];
			
 
				-
			
 
				 	unsigned int interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES];
			
 
				 	for (unsigned int i = 0; i < requested_candidates; i++)
			
 
				 	{
			
 
				-		if (best_is_uncor)
			
 
				-		{
			
 
				-			interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
			
 
				-			interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			interleave[2 * i] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
			
 
				-			interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
			
 
				-		}
			
 
				+		interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
			
 
				+		interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
			
 
				 	}
			
 
				 
			
 
				 	uint64_t bitmasks[1024/64] { 0 };
			
--- a/thirdparty/astcenc/astcenc_internal.h
+++ b/thirdparty/astcenc/astcenc_internal.h
@@ -293,11 +293,13 @@ struct partition_lines3
 
				 	/** @brief Post-processed line for correlated chroma, passing though the origin. */
			
 
				 	processed_line3 samec_pline;
			
 
				 
			
 
				-	/** @brief The length of the line for uncorrelated chroma. */
			
 
				-	float uncor_line_len;
			
 
				-
			
 
				-	/** @brief The length of the line for correlated chroma. */
			
 
				-	float samec_line_len;
			
 
				+	/**
			
 
				+	 * @brief The length of the line for uncorrelated chroma.
			
 
				+	 *
			
 
				+	 * This is used for both the uncorrelated and same chroma lines - they are normally very similar
			
 
				+	 * and only used for the relative ranking of partitionings against one another.
			
 
				+	 */
			
 
				+	float line_length;
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -319,8 +321,8 @@ struct partition_info
 
				 	/**
			
 
				 	 * @brief The number of texels in each partition.
			
 
				 	 *
			
 
				-	 * Note that some seeds result in zero texels assigned to a partition are valid, but are skipped
			
 
				-	 * by this compressor as there is no point spending bits encoding an unused color endpoint.
			
 
				+	 * Note that some seeds result in zero texels assigned to a partition. These are valid, but are
			
 
				+	 * skipped by this compressor as there is no point spending bits encoding an unused endpoints.
			
 
				 	 */
			
 
				 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
			
 
				 
			
@@ -455,23 +457,23 @@ struct decimation_mode
 
				 	 *
			
 
				 	 * Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
			
 
				 	 */
			
 
				-	uint16_t refprec_1_plane;
			
 
				+	uint16_t refprec_1plane;
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief Bitvector indicating weight quant methods used by active 2 plane block modes.
			
 
				 	 *
			
 
				 	 * Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
			
 
				 	 */
			
 
				-	uint16_t refprec_2_planes;
			
 
				+	uint16_t refprec_2planes;
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief Set a 1 plane weight quant as active.
			
 
				 	 *
			
 
				 	 * @param weight_quant   The quant method to set.
			
 
				 	 */
			
 
				-	void set_ref_1_plane(quant_method weight_quant)
			
 
				+	void set_ref_1plane(quant_method weight_quant)
			
 
				 	{
			
 
				-		refprec_1_plane |= (1 << weight_quant);
			
 
				+		refprec_1plane |= (1 << weight_quant);
			
 
				 	}
			
 
				 
			
 
				 	/**
			
@@ -479,10 +481,10 @@ struct decimation_mode
 
				 	 *
			
 
				 	 * @param max_weight_quant   The max quant method to test.
			
 
				 	 */
			
 
				-	bool is_ref_1_plane(quant_method max_weight_quant) const
			
 
				+	bool is_ref_1plane(quant_method max_weight_quant) const
			
 
				 	{
			
 
				 		uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
			
 
				-		return (refprec_1_plane & mask) != 0;
			
 
				+		return (refprec_1plane & mask) != 0;
			
 
				 	}
			
 
				 
			
 
				 	/**
			
@@ -490,9 +492,9 @@ struct decimation_mode
 
				 	 *
			
 
				 	 * @param weight_quant   The quant method to set.
			
 
				 	 */
			
 
				-	void set_ref_2_plane(quant_method weight_quant)
			
 
				+	void set_ref_2plane(quant_method weight_quant)
			
 
				 	{
			
 
				-		refprec_2_planes |= static_cast<uint16_t>(1 << weight_quant);
			
 
				+		refprec_2planes |= static_cast<uint16_t>(1 << weight_quant);
			
 
				 	}
			
 
				 
			
 
				 	/**
			
@@ -500,10 +502,10 @@ struct decimation_mode
 
				 	 *
			
 
				 	 * @param max_weight_quant   The max quant method to test.
			
 
				 	 */
			
 
				-	bool is_ref_2_plane(quant_method max_weight_quant) const
			
 
				+	bool is_ref_2plane(quant_method max_weight_quant) const
			
 
				 	{
			
 
				 		uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
			
 
				-		return (refprec_2_planes & mask) != 0;
			
 
				+		return (refprec_2planes & mask) != 0;
			
 
				 	}
			
 
				 };
			
 
				 
			
@@ -1336,9 +1338,14 @@ bool is_legal_3d_block_size(
 
				  * Converts unquant value in 0-255 range into quant value in 0-255 range.
			
 
				  * No BISE scrambling is applied at this stage.
			
 
				  *
			
 
				- * Indexed by [quant_mode - 4][data_value].
			
 
				+ * The BISE encoding results in ties where available quant<256> values are
			
 
				+ * equidistant the available quant<BISE> values. This table stores two values
			
 
				+ * for each input - one for use with a negative residual, and one for use with
			
 
				+ * a positive residual.
			
 
				+ *
			
 
				+ * Indexed by [quant_mode - 4][data_value * 2 + residual].
			
 
				  */
			
 
				-extern const uint8_t color_unquant_to_uquant_tables[17][256];
			
 
				+extern const uint8_t color_unquant_to_uquant_tables[17][512];
			
 
				 
			
 
				 /**
			
 
				  * @brief The precomputed table for packing quantized color values.
			
@@ -1528,8 +1535,7 @@ void compute_error_squared_rgb(
 
				  * @param      blk             The image block color data to be compressed.
			
 
				  * @param      uncor_plines    Processed uncorrelated partition lines for each partition.
			
 
				  * @param      samec_plines    Processed same chroma partition lines for each partition.
			
 
				- * @param[out] uncor_lengths   The length of each components deviation from the line.
			
 
				- * @param[out] samec_lengths   The length of each components deviation from the line.
			
 
				+ * @param[out] line_lengths    The length of each components deviation from the line.
			
 
				  * @param[out] uncor_error     The cumulative error for using the uncorrelated line.
			
 
				  * @param[out] samec_error     The cumulative error for using the same chroma line.
			
 
				  */
			
@@ -1538,8 +1544,7 @@ void compute_error_squared_rgba(
 
				 	const image_block& blk,
			
 
				 	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
			
 
				 	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
			
 
				-	float uncor_lengths[BLOCK_MAX_PARTITIONS],
			
 
				-	float samec_lengths[BLOCK_MAX_PARTITIONS],
			
 
				+	float line_lengths[BLOCK_MAX_PARTITIONS],
			
 
				 	float& uncor_error,
			
 
				 	float& samec_error);
			
 
				 
			
@@ -2120,34 +2125,6 @@ void physical_to_symbolic(
 
				 /* ============================================================================
			
 
				 Platform-specific functions.
			
 
				 ============================================================================ */
			
 
				-/**
			
 
				- * @brief Run-time detection if the host CPU supports the POPCNT extension.
			
 
				- *
			
 
				- * @return @c true if supported, @c false if not.
			
 
				- */
			
 
				-bool cpu_supports_popcnt();
			
 
				-
			
 
				-/**
			
 
				- * @brief Run-time detection if the host CPU supports F16C extension.
			
 
				- *
			
 
				- * @return @c true if supported, @c false if not.
			
 
				- */
			
 
				-bool cpu_supports_f16c();
			
 
				-
			
 
				-/**
			
 
				- * @brief Run-time detection if the host CPU supports SSE 4.1 extension.
			
 
				- *
			
 
				- * @return @c true if supported, @c false if not.
			
 
				- */
			
 
				-bool cpu_supports_sse41();
			
 
				-
			
 
				-/**
			
 
				- * @brief Run-time detection if the host CPU supports AVX 2 extension.
			
 
				- *
			
 
				- * @return @c true if supported, @c false if not.
			
 
				- */
			
 
				-bool cpu_supports_avx2();
			
 
				-
			
 
				 /**
			
 
				  * @brief Allocate an aligned memory buffer.
			
 
				  *
			
--- a/thirdparty/astcenc/astcenc_platform_isa_detection.cpp
+++ b/thirdparty/astcenc/astcenc_platform_isa_detection.cpp
@@ -1,166 +0,0 @@
 
				-// SPDX-License-Identifier: Apache-2.0
			
 
				-// ----------------------------------------------------------------------------
			
 
				-// Copyright 2020-2022 Arm Limited
			
 
				-//
			
 
				-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				-// use this file except in compliance with the License. You may obtain a copy
			
 
				-// of the License at:
			
 
				-//
			
 
				-//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-//
			
 
				-// Unless required by applicable law or agreed to in writing, software
			
 
				-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
			
 
				-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
			
 
				-// License for the specific language governing permissions and limitations
			
 
				-// under the License.
			
 
				-// ----------------------------------------------------------------------------
			
 
				-
			
 
				-/**
			
 
				- * @brief Platform-specific function implementations.
			
 
				- *
			
 
				- * This module contains functions for querying the host extended ISA support.
			
 
				- */
			
 
				-
			
 
				-// Include before the defines below to pick up any auto-setup based on compiler
			
 
				-// built-in config, if not being set explicitly by the build system
			
 
				-#include "astcenc_internal.h"
			
 
				-
			
 
				-#if (ASTCENC_SSE > 0)    || (ASTCENC_AVX > 0) || \
			
 
				-    (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
			
 
				-
			
 
				-static bool g_init { false };
			
 
				-
			
 
				-/** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */
			
 
				-static bool g_cpu_has_sse41 { false };
			
 
				-
			
 
				-/** Does this CPU support AVX2? Set to -1 if not yet initialized. */
			
 
				-static bool g_cpu_has_avx2 { false };
			
 
				-
			
 
				-/** Does this CPU support POPCNT? Set to -1 if not yet initialized. */
			
 
				-static bool g_cpu_has_popcnt { false };
			
 
				-
			
 
				-/** Does this CPU support F16C? Set to -1 if not yet initialized. */
			
 
				-static bool g_cpu_has_f16c { false };
			
 
				-
			
 
				-/* ============================================================================
			
 
				-   Platform code for Visual Studio
			
 
				-============================================================================ */
			
 
				-#if !defined(__clang__) && defined(_MSC_VER)
			
 
				-#define WIN32_LEAN_AND_MEAN
			
 
				-#include <windows.h>
			
 
				-#include <intrin.h>
			
 
				-
			
 
				-/**
			
 
				- * @brief Detect platform CPU ISA support and update global trackers.
			
 
				- */
			
 
				-static void detect_cpu_isa()
			
 
				-{
			
 
				-	int data[4];
			
 
				-
			
 
				-	__cpuid(data, 0);
			
 
				-	int num_id = data[0];
			
 
				-
			
 
				-	if (num_id >= 1)
			
 
				-	{
			
 
				-		__cpuidex(data, 1, 0);
			
 
				-		// SSE41 = Bank 1, ECX, bit 19
			
 
				-		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
			
 
				-		// POPCNT = Bank 1, ECX, bit 23
			
 
				-		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
			
 
				-		// F16C = Bank 1, ECX, bit 29
			
 
				-		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
			
 
				-	}
			
 
				-
			
 
				-	if (num_id >= 7)
			
 
				-	{
			
 
				-		__cpuidex(data, 7, 0);
			
 
				-		// AVX2 = Bank 7, EBX, bit 5
			
 
				-		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
			
 
				-	}
			
 
				-
			
 
				-	// Ensure state bits are updated before init flag is updated
			
 
				-	MemoryBarrier();
			
 
				-	g_init = true;
			
 
				-}
			
 
				-
			
 
				-/* ============================================================================
			
 
				-   Platform code for GCC and Clang
			
 
				-============================================================================ */
			
 
				-#else
			
 
				-#include <cpuid.h>
			
 
				-
			
 
				-/**
			
 
				- * @brief Detect platform CPU ISA support and update global trackers.
			
 
				- */
			
 
				-static void detect_cpu_isa()
			
 
				-{
			
 
				-	unsigned int data[4];
			
 
				-
			
 
				-	if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
			
 
				-	{
			
 
				-		// SSE41 = Bank 1, ECX, bit 19
			
 
				-		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
			
 
				-		// POPCNT = Bank 1, ECX, bit 23
			
 
				-		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
			
 
				-		// F16C = Bank 1, ECX, bit 29
			
 
				-		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
			
 
				-	}
			
 
				-
			
 
				-	g_cpu_has_avx2 = 0;
			
 
				-	if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3]))
			
 
				-	{
			
 
				-		// AVX2 = Bank 7, EBX, bit 5
			
 
				-		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
			
 
				-	}
			
 
				-
			
 
				-	// Ensure state bits are updated before init flag is updated
			
 
				-	__sync_synchronize();
			
 
				-	g_init = true;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-/* See header for documentation. */
			
 
				-bool cpu_supports_popcnt()
			
 
				-{
			
 
				-	if (!g_init)
			
 
				-	{
			
 
				-		detect_cpu_isa();
			
 
				-	}
			
 
				-
			
 
				-	return g_cpu_has_popcnt;
			
 
				-}
			
 
				-
			
 
				-/* See header for documentation. */
			
 
				-bool cpu_supports_f16c()
			
 
				-{
			
 
				-	if (!g_init)
			
 
				-	{
			
 
				-		detect_cpu_isa();
			
 
				-	}
			
 
				-
			
 
				-	return g_cpu_has_f16c;
			
 
				-}
			
 
				-
			
 
				-/* See header for documentation. */
			
 
				-bool cpu_supports_sse41()
			
 
				-{
			
 
				-	if (!g_init)
			
 
				-	{
			
 
				-		detect_cpu_isa();
			
 
				-	}
			
 
				-
			
 
				-	return g_cpu_has_sse41;
			
 
				-}
			
 
				-
			
 
				-/* See header for documentation. */
			
 
				-bool cpu_supports_avx2()
			
 
				-{
			
 
				-	if (!g_init)
			
 
				-	{
			
 
				-		detect_cpu_isa();
			
 
				-	}
			
 
				-
			
 
				-	return g_cpu_has_avx2;
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/thirdparty/astcenc/astcenc_quantization.cpp
+++ b/thirdparty/astcenc/astcenc_quantization.cpp
--- a/thirdparty/astcenc/astcenc_symbolic_physical.cpp
+++ b/thirdparty/astcenc/astcenc_symbolic_physical.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2011-2021 Arm Limited
			
 
				+// Copyright 2011-2023 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -24,36 +24,21 @@
 
				 #include <cassert>
			
 
				 
			
 
				 /**
			
 
				- * @brief Write up to 8 bits at an arbitrary bit offset.
			
 
				- *
			
 
				- * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so
			
 
				- * may span two separate bytes in memory.
			
 
				+ * @brief Reverse bits in a byte.
			
 
				  *
			
 
				- * @param         value       The value to write.
			
 
				- * @param         bitcount    The number of bits to write, starting from LSB.
			
 
				- * @param         bitoffset   The bit offset to store at, between 0 and 7.
			
 
				- * @param[in,out] ptr         The data pointer to write to.
			
 
				+ * @param p   The value to reverse.
			
 
				+  *
			
 
				+ * @return The reversed result.
			
 
				  */
			
 
				-static inline void write_bits(
			
 
				-	int value,
			
 
				-	int bitcount,
			
 
				-	int bitoffset,
			
 
				-	uint8_t* ptr
			
 
				-) {
			
 
				-	int mask = (1 << bitcount) - 1;
			
 
				-	value &= mask;
			
 
				-	ptr += bitoffset >> 3;
			
 
				-	bitoffset &= 7;
			
 
				-	value <<= bitoffset;
			
 
				-	mask <<= bitoffset;
			
 
				-	mask = ~mask;
			
 
				-
			
 
				-	ptr[0] &= mask;
			
 
				-	ptr[0] |= value;
			
 
				-	ptr[1] &= mask >> 8;
			
 
				-	ptr[1] |= value >> 8;
			
 
				+static inline int bitrev8(int p)
			
 
				+{
			
 
				+	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
			
 
				+	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
			
 
				+	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
			
 
				+	return p;
			
 
				 }
			
 
				 
			
 
				+
			
 
				 /**
			
 
				  * @brief Read up to 8 bits at an arbitrary bit offset.
			
 
				  *
			
@@ -80,19 +65,37 @@ static inline int read_bits(
 
				 	return value;
			
 
				 }
			
 
				 
			
 
				+#if !defined(ASTCENC_DECOMPRESS_ONLY)
			
 
				+
			
 
				 /**
			
 
				- * @brief Reverse bits in a byte.
			
 
				+ * @brief Write up to 8 bits at an arbitrary bit offset.
			
 
				  *
			
 
				- * @param p   The value to reverse.
			
 
				-  *
			
 
				- * @return The reversed result.
			
 
				+ * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so
			
 
				+ * may span two separate bytes in memory.
			
 
				+ *
			
 
				+ * @param         value       The value to write.
			
 
				+ * @param         bitcount    The number of bits to write, starting from LSB.
			
 
				+ * @param         bitoffset   The bit offset to store at, between 0 and 7.
			
 
				+ * @param[in,out] ptr         The data pointer to write to.
			
 
				  */
			
 
				-static inline int bitrev8(int p)
			
 
				-{
			
 
				-	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
			
 
				-	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
			
 
				-	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
			
 
				-	return p;
			
 
				+static inline void write_bits(
			
 
				+	int value,
			
 
				+	int bitcount,
			
 
				+	int bitoffset,
			
 
				+	uint8_t* ptr
			
 
				+) {
			
 
				+	int mask = (1 << bitcount) - 1;
			
 
				+	value &= mask;
			
 
				+	ptr += bitoffset >> 3;
			
 
				+	bitoffset &= 7;
			
 
				+	value <<= bitoffset;
			
 
				+	mask <<= bitoffset;
			
 
				+	mask = ~mask;
			
 
				+
			
 
				+	ptr[0] &= mask;
			
 
				+	ptr[0] |= value;
			
 
				+	ptr[1] &= mask >> 8;
			
 
				+	ptr[1] |= value >> 8;
			
 
				 }
			
 
				 
			
 
				 /* See header for documentation. */
			
@@ -282,6 +285,8 @@ void symbolic_to_physical(
 
				 	           scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS);
			
 
				 }
			
 
				 
			
 
				+#endif
			
 
				+
			
 
				 /* See header for documentation. */
			
 
				 void physical_to_symbolic(
			
 
				 	const block_size_descriptor& bsd,
			
--- a/thirdparty/astcenc/astcenc_weight_align.cpp
+++ b/thirdparty/astcenc/astcenc_weight_align.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2011-2022 Arm Limited
			
 
				+// Copyright 2011-2023 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -353,7 +353,7 @@ void compute_angular_endpoints_1plane(
 
				 	for (unsigned int i = 0; i < max_decimation_modes; i++)
			
 
				 	{
			
 
				 		const decimation_mode& dm = bsd.decimation_modes[i];
			
 
				-		if (!dm.is_ref_1_plane(static_cast<quant_method>(max_weight_quant)))
			
 
				+		if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
			
 
				 		{
			
 
				 			continue;
			
 
				 		}
			
@@ -422,7 +422,7 @@ void compute_angular_endpoints_2planes(
 
				 	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
			
 
				 	{
			
 
				 		const decimation_mode& dm = bsd.decimation_modes[i];
			
 
				-		if (!dm.is_ref_2_plane(static_cast<quant_method>(max_weight_quant)))
			
 
				+		if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
			
 
				 		{
			
 
				 			continue;
			
 
				 		}