3 years ago · 1395f4e969
--- a/3rdparty/astc-encoder/include/astcenc.h
+++ b/3rdparty/astc-encoder/include/astcenc.h
@@ -514,25 +514,15 @@ struct astcenc_config
 
				 	float tune_db_limit;
			
 
				 
			
 
				 	/**
			
 
				-	 * @brief The amount of overshoot needed to early-out mode 0 fast path.
			
 
				+	 * @brief The amount of MSE overshoot needed to early-out trials.
			
 
				 	 *
			
 
				-	 * We have a fast-path for mode 0 (1 partition, 1 plane) which uses only essential block modes
			
 
				-	 * as an initial search. This can short-cut compression for simple blocks, but to avoid
			
 
				-	 * short-cutting too much we force this to overshoot the MSE threshold needed to hit the
			
 
				-	 * block-local db_limit e.g. 1.0 = no overshoot, 2.0 = need half the error to trigger.
			
 
				-	 */
			
 
				-	float tune_mode0_mse_overshoot;
			
 
				-
			
 
				-	/**
			
 
				-	 * @brief The amount of overshoot needed to early-out refinement.
			
 
				+	 * The first early-out is for 1 partition, 1 plane trials, where we try a minimal encode using
			
 
				+	 * the high probability block modes. This can short-cut compression for simple blocks.
			
 
				 	 *
			
 
				-	 * The codec will refine block candidates iteratively to improve the encoding, based on the
			
 
				-	 * @c tune_refinement_limit count. Earlier implementations will use all refinement iterations,
			
 
				-	 * even if the target threshold is reached. This tuning parameter allows an early out, but with
			
 
				-	 * an overshoot MSE threshold. Setting this to 1.0 will early-out as soon as the target is hit,
			
 
				-	 * but does reduce image quality vs the default behavior of over-refinement.
			
 
				+	 * The second early-out is for refinement trials, where we can exit refinement once quality is
			
 
				+	 * reached.
			
 
				 	 */
			
 
				-	float tune_refinement_mse_overshoot;
			
 
				+	float tune_mse_overshoot;
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The threshold for skipping 3.1/4.1 trials (-2partitionlimitfactor).
			
--- a/3rdparty/astc-encoder/source/astcenc_color_quantize.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_color_quantize.cpp
@@ -1960,7 +1960,7 @@ uint8_t pack_color_endpoints(
 
				 	switch (format)
			
 
				 	{
			
 
				 	case FMT_RGB:
			
 
				-		if (quant_level <= 18)
			
 
				+		if (quant_level <= QUANT_160)
			
 
				 		{
			
 
				 			if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level))
			
 
				 			{
			
@@ -1973,7 +1973,7 @@ uint8_t pack_color_endpoints(
 
				 				break;
			
 
				 			}
			
 
				 		}
			
 
				-		if (try_quantize_rgb_blue_contract(color0, color1, output, quant_level))
			
 
				+		if (quant_level < QUANT_256 && try_quantize_rgb_blue_contract(color0, color1, output, quant_level))
			
 
				 		{
			
 
				 			retval = FMT_RGB;
			
 
				 			break;
			
@@ -1983,7 +1983,7 @@ uint8_t pack_color_endpoints(
 
				 		break;
			
 
				 
			
 
				 	case FMT_RGBA:
			
 
				-		if (quant_level <= 18)
			
 
				+		if (quant_level <= QUANT_160)
			
 
				 		{
			
 
				 			if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level))
			
 
				 			{
			
@@ -1996,7 +1996,7 @@ uint8_t pack_color_endpoints(
 
				 				break;
			
 
				 			}
			
 
				 		}
			
 
				-		if (try_quantize_rgba_blue_contract(color0, color1, output, quant_level))
			
 
				+		if (quant_level < QUANT_256 && try_quantize_rgba_blue_contract(color0, color1, output, quant_level))
			
 
				 		{
			
 
				 			retval = FMT_RGBA;
			
 
				 			break;
			
--- a/3rdparty/astc-encoder/source/astcenc_compress_symbolic.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_compress_symbolic.cpp
@@ -82,7 +82,7 @@ static bool realign_weights_undecimated(
 
				 	const quant_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_level];
			
 
				 
			
 
				 	unsigned int max_plane = bm.is_dual_plane;
			
 
				-	int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1;
			
 
				+	int plane2_component = scb.plane2_component;
			
 
				 	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
			
 
				 
			
 
				 	// Decode the color endpoints
			
@@ -206,7 +206,7 @@ static bool realign_weights_decimated(
 
				 	assert(weight_count != bsd.texel_count);
			
 
				 
			
 
				 	unsigned int max_plane = bm.is_dual_plane;
			
 
				-	int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1;
			
 
				+	int plane2_component = scb.plane2_component;
			
 
				 	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
			
 
				 
			
 
				 	// Decode the color endpoints
			
@@ -1279,13 +1279,13 @@ void compress_block(
 
				 	// compression and slightly reduces image quality.
			
 
				 
			
 
				 	float errorval_mult[2] {
			
 
				-		1.0f / ctx.config.tune_mode0_mse_overshoot,
			
 
				+		1.0f / ctx.config.tune_mse_overshoot,
			
 
				 		1.0f
			
 
				 	};
			
 
				 
			
 
				-	static const float errorval_overshoot = 1.0f / ctx.config.tune_refinement_mse_overshoot;
			
 
				+	static const float errorval_overshoot = 1.0f / ctx.config.tune_mse_overshoot;
			
 
				 
			
 
				-	// Only enable MODE0 fast path (trial 0) if 2D and more than 25 texels
			
 
				+	// Only enable MODE0 fast path (trial 0) if 2D, and more than 25 texels
			
 
				 	int start_trial = 1;
			
 
				 	if ((bsd.texel_count >= TUNE_MIN_TEXELS_MODE0_FASTPATH) && (bsd.zdim == 1))
			
 
				 	{
			
--- a/3rdparty/astc-encoder/source/astcenc_decompress_symbolic.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_decompress_symbolic.cpp
@@ -286,7 +286,7 @@ void decompress_symbolic_block(
 
				 	unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);
			
 
				 
			
 
				 	// Now that we have endpoint colors and weights, we can unpack texel colors
			
 
				-	int plane2_component = is_dual_plane ? scb.plane2_component : -1;
			
 
				+	int plane2_component = scb.plane2_component;
			
 
				 	vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);
			
 
				 
			
 
				 	for (int i = 0; i < partition_count; i++)
			
--- a/3rdparty/astc-encoder/source/astcenc_entry.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_entry.cpp
@@ -51,89 +51,84 @@ struct astcenc_preset_config
 
				 	unsigned int tune_4partitioning_candidate_limit;
			
 
				 	float tune_db_limit_a_base;
			
 
				 	float tune_db_limit_b_base;
			
 
				-	float tune_mode0_mse_overshoot;
			
 
				-	float tune_refinement_mse_overshoot;
			
 
				+	float tune_mse_overshoot;
			
 
				 	float tune_2_partition_early_out_limit_factor;
			
 
				 	float tune_3_partition_early_out_limit_factor;
			
 
				 	float tune_2_plane_early_out_limit_correlation;
			
 
				 };
			
 
				 
			
 
				-
			
 
				 /**
			
 
				- * @brief The static quality presets that are built-in for high bandwidth
			
 
				- * presets (x < 25 texels per block).
			
 
				+ * @brief The static presets for high bandwidth encodings (x < 25 texels per block).
			
 
				  */
			
 
				 static const std::array<astcenc_preset_config, 6> preset_configs_high {{
			
 
				 	{
			
 
				 		ASTCENC_PRE_FASTEST,
			
 
				-		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
			
 
				+		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_FAST,
			
 
				-		3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.90f
			
 
				+		3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_MEDIUM,
			
 
				-		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 2.5f, 1.1f, 1.05f, 0.95f
			
 
				+		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_THOROUGH,
			
 
				-		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.35f, 1.15f, 0.97f
			
 
				+		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_VERYTHOROUGH,
			
 
				-		4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
			
 
				+		4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_EXHAUSTIVE,
			
 
				-		4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
			
 
				+		4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
			
 
				 	}
			
 
				 }};
			
 
				 
			
 
				 /**
			
 
				- * @brief The static quality presets that are built-in for medium bandwidth
			
 
				- * presets (25 <= x < 64 texels per block).
			
 
				+ * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
			
 
				  */
			
 
				 static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
			
 
				 	{
			
 
				 		ASTCENC_PRE_FASTEST,
			
 
				-		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.80f
			
 
				+		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_FAST,
			
 
				-		3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
			
 
				+		3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_MEDIUM,
			
 
				-		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 3.0f, 1.1f, 1.05f, 0.90f
			
 
				+		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_THOROUGH,
			
 
				-		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.4f, 1.2f, 0.95f
			
 
				+		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_VERYTHOROUGH,
			
 
				-		4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
			
 
				+		4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_EXHAUSTIVE,
			
 
				-		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
			
 
				+		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
			
 
				 	}
			
 
				 }};
			
 
				 
			
 
				 /**
			
 
				- * @brief The static quality presets that are built-in for low bandwidth
			
 
				- * presets (64 <= x texels per block).
			
 
				+ * @brief The static presets for low bandwidth encodings (64 <= x texels per block).
			
 
				  */
			
 
				 static const std::array<astcenc_preset_config, 6> preset_configs_low {{
			
 
				 	{
			
 
				 		ASTCENC_PRE_FASTEST,
			
 
				-		2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.80f
			
 
				+		2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_FAST,
			
 
				-		2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
			
 
				+		2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_MEDIUM,
			
 
				-		3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 3.5f, 1.1f, 1.05f, 0.90f
			
 
				+		3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_THOROUGH,
			
 
				-		4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.3f, 1.2f, 0.97f
			
 
				+		4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_VERYTHOROUGH,
			
 
				-		4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
			
 
				+		4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
			
 
				 	}, {
			
 
				 		ASTCENC_PRE_EXHAUSTIVE,
			
 
				-		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
			
 
				+		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
			
 
				 	}
			
 
				 }};
			
 
				 
			
@@ -444,8 +439,7 @@ static astcenc_error validate_config(
 
				 	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
			
 
				 	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
			
 
				 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
			
 
				-	config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f);
			
 
				-	config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f);
			
 
				+	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
			
 
				 	config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f);
			
 
				 	config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f);
			
 
				 	config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f);
			
@@ -568,8 +562,7 @@ astcenc_error astcenc_config_init(
 
				 		config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
			
 
				 		                                 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
			
 
				 
			
 
				-		config.tune_mode0_mse_overshoot = (*preset_configs)[start].tune_mode0_mse_overshoot;
			
 
				-		config.tune_refinement_mse_overshoot = (*preset_configs)[start].tune_refinement_mse_overshoot;
			
 
				+		config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
			
 
				 
			
 
				 		config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
			
 
				 		config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
			
@@ -611,8 +604,7 @@ astcenc_error astcenc_config_init(
 
				 		config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
			
 
				 		                                 LERP(tune_db_limit_b_base) - 19 * ltexels);
			
 
				 
			
 
				-		config.tune_mode0_mse_overshoot = LERP(tune_mode0_mse_overshoot);
			
 
				-		config.tune_refinement_mse_overshoot = LERP(tune_refinement_mse_overshoot);
			
 
				+		config.tune_mse_overshoot = LERP(tune_mse_overshoot);
			
 
				 
			
 
				 		config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
			
 
				 		config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);
			
--- a/3rdparty/astc-encoder/source/astcenc_find_best_partitioning.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_find_best_partitioning.cpp
@@ -362,6 +362,7 @@ static void count_partition_mismatch_bits(
 
				 	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
			
 
				 ) {
			
 
				 	unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
			
 
				+	promise(active_count > 0);
			
 
				 
			
 
				 	if (partition_count == 2)
			
 
				 	{
			
@@ -400,6 +401,7 @@ static unsigned int get_partition_ordering_by_mismatch_bits(
 
				 	const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],
			
 
				 	unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
			
 
				 ) {
			
 
				+	promise(partitioning_count > 0);
			
 
				 	unsigned int mscount[256] { 0 };
			
 
				 
			
 
				 	// Create the histogram of mismatch counts
			
@@ -488,7 +490,7 @@ static unsigned int compute_kmeans_partition_ordering(
 
				 /**
			
 
				  * @brief Insert a partitioning into an order list of results, sorted by error.
			
 
				  *
			
 
				- * @param      max_values      The max number of entries in the best result arrays/
			
 
				+ * @param      max_values      The max number of entries in the best result arrays.
			
 
				  * @param      this_error      The error of the new entry.
			
 
				  * @param      this_partition  The partition ID of the new entry.
			
 
				  * @param[out] best_errors     The array of best error values.
			
@@ -501,6 +503,8 @@ static void insert_result(
 
				 	float* best_errors,
			
 
				 	unsigned int* best_partitions)
			
 
				 {
			
 
				+	promise(max_values > 0);
			
 
				+
			
 
				 	// Don't bother searching if the current worst error beats the new error
			
 
				 	if (this_error >= best_errors[max_values - 1])
			
 
				 	{
			
@@ -508,7 +512,7 @@ static void insert_result(
 
				 	}
			
 
				 
			
 
				 	// Else insert into the list in error-order
			
 
				-	for (unsigned int i = 0; i < max_values;  i++)
			
 
				+	for (unsigned int i = 0; i < max_values; i++)
			
 
				 	{
			
 
				 		// Existing result is better - move on ...
			
 
				 		if (this_error > best_errors[i])
			
--- a/3rdparty/astc-encoder/source/astcenc_ideal_endpoints_and_weights.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_ideal_endpoints_and_weights.cpp
@@ -692,6 +692,7 @@ float compute_error_of_weight_set_1plane(
 
				 ) {
			
 
				 	vfloatacc error_summav = vfloatacc::zero();
			
 
				 	unsigned int texel_count = di.texel_count;
			
 
				+	promise(texel_count > 0);
			
 
				 
			
 
				 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
			
 
				 	if (di.max_texel_weight_count > 2)
			
@@ -757,6 +758,7 @@ float compute_error_of_weight_set_2planes(
 
				 ) {
			
 
				 	vfloatacc error_summav = vfloatacc::zero();
			
 
				 	unsigned int texel_count = di.texel_count;
			
 
				+	promise(texel_count > 0);
			
 
				 
			
 
				 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
			
 
				 	if (di.max_texel_weight_count > 2)
			
@@ -861,8 +863,7 @@ void compute_ideal_weights_for_decimation(
 
				 	// zero-initialized SIMD over-fetch region
			
 
				 	if (is_direct)
			
 
				 	{
			
 
				-		unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
			
 
				-		for (unsigned int i = 0; i < texel_count_simd; i += ASTCENC_SIMD_WIDTH)
			
 
				+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
			
 
				 		{
			
 
				 			vfloat weight(ei.weights + i);
			
 
				 			storea(weight, dec_weight_ideal_value + i);
			
@@ -970,7 +971,7 @@ void compute_ideal_weights_for_decimation(
 
				 		vfloat step = (error_change1 * chd_scale) / error_change0;
			
 
				 		step = clamp(-stepsize, stepsize, step);
			
 
				 
			
 
				-		// Update the weight; note this can store negative values.
			
 
				+		// Update the weight; note this can store negative values
			
 
				 		storea(weight_val + step, dec_weight_ideal_value + i);
			
 
				 	}
			
 
				 }
			
@@ -1215,7 +1216,7 @@ void recompute_ideal_colors_1plane(
 
				 		// Only compute a partition mean if more than one partition
			
 
				 		if (partition_count > 1)
			
 
				 		{
			
 
				-			rgba_sum = vfloat4(1e-17f);
			
 
				+			rgba_sum = vfloat4::zero();
			
 
				 			promise(texel_count > 0);
			
 
				 			for (unsigned int j = 0; j < texel_count; j++)
			
 
				 			{
			
@@ -1251,7 +1252,6 @@ void recompute_ideal_colors_1plane(
 
				 		for (unsigned int j = 0; j < texel_count; j++)
			
 
				 		{
			
 
				 			unsigned int tix = texel_indexes[j];
			
 
				-
			
 
				 			vfloat4 rgba = blk.texel(tix);
			
 
				 
			
 
				 			float idx0 = undec_weight_ref[tix];
			
@@ -1284,9 +1284,6 @@ void recompute_ideal_colors_1plane(
 
				 		vfloat4 right_sum  = vfloat4(right_sum_s) * color_weight;
			
 
				 		vfloat4 lmrs_sum   = vfloat3(left_sum_s, middle_sum_s, right_sum_s) * ls_weight;
			
 
				 
			
 
				-		vfloat4 weight_weight_sum = vfloat4(weight_weight_sum_s) * color_weight;
			
 
				-		float psum = right_sum_s * hadd_rgb_s(color_weight);
			
 
				-
			
 
				 		color_vec_x = color_vec_x * color_weight;
			
 
				 		color_vec_y = color_vec_y * color_weight;
			
 
				 
			
@@ -1349,26 +1346,32 @@ void recompute_ideal_colors_1plane(
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		// Calculations specific to mode #7, the HDR RGB-scale mode
			
 
				-		vfloat4 rgbq_sum = color_vec_x + color_vec_y;
			
 
				-		rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
			
 
				+		// Calculations specific to mode #7, the HDR RGB-scale mode - skip if known LDR
			
 
				+		if (blk.rgb_lns[0] || blk.alpha_lns[0])
			
 
				+		{
			
 
				+			vfloat4 weight_weight_sum = vfloat4(weight_weight_sum_s) * color_weight;
			
 
				+			float psum = right_sum_s * hadd_rgb_s(color_weight);
			
 
				 
			
 
				-		vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
			
 
				-		rgbo_vectors[i] = rgbovec;
			
 
				+			vfloat4 rgbq_sum = color_vec_x + color_vec_y;
			
 
				+			rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
			
 
				 
			
 
				-		// We can get a failure due to the use of a singular (non-invertible) matrix
			
 
				-		// If it failed, compute rgbo_vectors[] with a different method ...
			
 
				-		if (astc::isnan(dot_s(rgbovec, rgbovec)))
			
 
				-		{
			
 
				-			vfloat4 v0 = ep.endpt0[i];
			
 
				-			vfloat4 v1 = ep.endpt1[i];
			
 
				+			vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
			
 
				+			rgbo_vectors[i] = rgbovec;
			
 
				 
			
 
				-			float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
			
 
				-			avgdif = astc::max(avgdif, 0.0f);
			
 
				+			// We can get a failure due to the use of a singular (non-invertible) matrix
			
 
				+			// If it failed, compute rgbo_vectors[] with a different method ...
			
 
				+			if (astc::isnan(dot_s(rgbovec, rgbovec)))
			
 
				+			{
			
 
				+				vfloat4 v0 = ep.endpt0[i];
			
 
				+				vfloat4 v1 = ep.endpt1[i];
			
 
				 
			
 
				-			vfloat4 avg = (v0 + v1) * 0.5f;
			
 
				-			vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
			
 
				-			rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
			
 
				+				float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
			
 
				+				avgdif = astc::max(avgdif, 0.0f);
			
 
				+
			
 
				+				vfloat4 avg = (v0 + v1) * 0.5f;
			
 
				+				vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
			
 
				+				rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -1516,7 +1519,7 @@ void recompute_ideal_colors_2planes(
 
				 		color_vec_x += cwprod - cwiprod;
			
 
				 
			
 
				 		scale_vec += vfloat2(om_idx0, idx0) * (ls_weight * scale);
			
 
				-		weight_weight_sum += (color_weight * color_idx);
			
 
				+		weight_weight_sum += color_idx;
			
 
				 	}
			
 
				 
			
 
				 	vfloat4 left1_sum   = vfloat4(left1_sum_s) * color_weight;
			
@@ -1528,8 +1531,6 @@ void recompute_ideal_colors_2planes(
 
				 	vfloat4 middle2_sum = vfloat4(middle2_sum_s) * color_weight;
			
 
				 	vfloat4 right2_sum  = vfloat4(right2_sum_s) * color_weight;
			
 
				 
			
 
				-	float psum = dot3_s(select(right1_sum, right2_sum, p2_mask), color_weight);
			
 
				-
			
 
				 	color_vec_x = color_vec_x * color_weight;
			
 
				 	color_vec_y = color_vec_y * color_weight;
			
 
				 
			
@@ -1630,26 +1631,32 @@ void recompute_ideal_colors_2planes(
 
				 		ep.endpt1[0] = select(ep.endpt1[0], ep1, full_mask);
			
 
				 	}
			
 
				 
			
 
				-	// Calculations specific to mode #7, the HDR RGB-scale mode
			
 
				-	vfloat4 rgbq_sum = color_vec_x + color_vec_y;
			
 
				-	rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
			
 
				+	// Calculations specific to mode #7, the HDR RGB-scale mode - skip if known LDR
			
 
				+	if (blk.rgb_lns[0] || blk.alpha_lns[0])
			
 
				+	{
			
 
				+		weight_weight_sum = weight_weight_sum * color_weight;
			
 
				+		float psum = dot3_s(select(right1_sum, right2_sum, p2_mask), color_weight);
			
 
				+
			
 
				+		vfloat4 rgbq_sum = color_vec_x + color_vec_y;
			
 
				+		rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
			
 
				 
			
 
				-	rgbo_vector = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
			
 
				+		rgbo_vector = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
			
 
				 
			
 
				-	// We can get a failure due to the use of a singular (non-invertible) matrix
			
 
				-	// If it failed, compute rgbo_vectors[] with a different method ...
			
 
				-	if (astc::isnan(dot_s(rgbo_vector, rgbo_vector)))
			
 
				-	{
			
 
				-		vfloat4 v0 = ep.endpt0[0];
			
 
				-		vfloat4 v1 = ep.endpt1[0];
			
 
				+		// We can get a failure due to the use of a singular (non-invertible) matrix
			
 
				+		// If it failed, compute rgbo_vectors[] with a different method ...
			
 
				+		if (astc::isnan(dot_s(rgbo_vector, rgbo_vector)))
			
 
				+		{
			
 
				+			vfloat4 v0 = ep.endpt0[0];
			
 
				+			vfloat4 v1 = ep.endpt1[0];
			
 
				 
			
 
				-		float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
			
 
				-		avgdif = astc::max(avgdif, 0.0f);
			
 
				+			float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
			
 
				+			avgdif = astc::max(avgdif, 0.0f);
			
 
				 
			
 
				-		vfloat4 avg = (v0 + v1) * 0.5f;
			
 
				-		vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
			
 
				+			vfloat4 avg = (v0 + v1) * 0.5f;
			
 
				+			vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
			
 
				 
			
 
				-		rgbo_vector = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
			
 
				+			rgbo_vector = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/3rdparty/astc-encoder/source/astcenc_integer_sequence.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_integer_sequence.cpp
@@ -24,6 +24,7 @@
 
				 #include <array>
			
 
				 
			
 
				 /** @brief Unpacked quint triplets <low,middle,high> for each packed value */
			
 
				+// TODO: Bitpack these into a uint16_t?
			
 
				 static const uint8_t quints_of_integer[128][3] {
			
 
				 	{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
			
 
				 	{4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4},
			
@@ -99,6 +100,7 @@ static const uint8_t integer_of_quints[5][5][5] {
 
				 };
			
 
				 
			
 
				 /** @brief Unpacked trit quintuplets <low,...,high> for each packed value */
			
 
				+// TODO: Bitpack these into a uint16_t?
			
 
				 static const uint8_t trits_of_integer[256][5] {
			
 
				 	{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
			
 
				 	{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
			
@@ -334,44 +336,41 @@ static const uint8_t integer_of_trits[3][3][3][3][3] {
 
				  */
			
 
				 struct btq_count
			
 
				 {
			
 
				-	/** @brief The quantization level. */
			
 
				-	uint8_t quant;
			
 
				-
			
 
				 	/** @brief The number of bits. */
			
 
				-	uint8_t bits;
			
 
				+	uint8_t bits:6;
			
 
				 
			
 
				 	/** @brief The number of trits. */
			
 
				-	uint8_t trits;
			
 
				+	uint8_t trits:1;
			
 
				 
			
 
				 	/** @brief The number of quints. */
			
 
				-	uint8_t quints;
			
 
				+	uint8_t quints:1;
			
 
				 };
			
 
				 
			
 
				 /**
			
 
				  * @brief The table of bits, trits, and quints needed for a quant encode.
			
 
				  */
			
 
				 static const std::array<btq_count, 21> btq_counts {{
			
 
				-	{   QUANT_2, 1, 0, 0 },
			
 
				-	{   QUANT_3, 0, 1, 0 },
			
 
				-	{   QUANT_4, 2, 0, 0 },
			
 
				-	{   QUANT_5, 0, 0, 1 },
			
 
				-	{   QUANT_6, 1, 1, 0 },
			
 
				-	{   QUANT_8, 3, 0, 0 },
			
 
				-	{  QUANT_10, 1, 0, 1 },
			
 
				-	{  QUANT_12, 2, 1, 0 },
			
 
				-	{  QUANT_16, 4, 0, 0 },
			
 
				-	{  QUANT_20, 2, 0, 1 },
			
 
				-	{  QUANT_24, 3, 1, 0 },
			
 
				-	{  QUANT_32, 5, 0, 0 },
			
 
				-	{  QUANT_40, 3, 0, 1 },
			
 
				-	{  QUANT_48, 4, 1, 0 },
			
 
				-	{  QUANT_64, 6, 0, 0 },
			
 
				-	{  QUANT_80, 4, 0, 1 },
			
 
				-	{  QUANT_96, 5, 1, 0 },
			
 
				-	{ QUANT_128, 7, 0, 0 },
			
 
				-	{ QUANT_160, 5, 0, 1 },
			
 
				-	{ QUANT_192, 6, 1, 0 },
			
 
				-	{ QUANT_256, 8, 0, 0 }
			
 
				+	{ 1, 0, 0 }, // QUANT_2
			
 
				+	{ 0, 1, 0 }, // QUANT_3
			
 
				+	{ 2, 0, 0 }, // QUANT_4
			
 
				+	{ 0, 0, 1 }, // QUANT_5
			
 
				+	{ 1, 1, 0 }, // QUANT_6
			
 
				+	{ 3, 0, 0 }, // QUANT_8
			
 
				+	{ 1, 0, 1 }, // QUANT_10
			
 
				+	{ 2, 1, 0 }, // QUANT_12
			
 
				+	{ 4, 0, 0 }, // QUANT_16
			
 
				+	{ 2, 0, 1 }, // QUANT_20
			
 
				+	{ 3, 1, 0 }, // QUANT_24
			
 
				+	{ 5, 0, 0 }, // QUANT_32
			
 
				+	{ 3, 0, 1 }, // QUANT_40
			
 
				+	{ 4, 1, 0 }, // QUANT_48
			
 
				+	{ 6, 0, 0 }, // QUANT_64
			
 
				+	{ 4, 0, 1 }, // QUANT_80
			
 
				+	{ 5, 1, 0 }, // QUANT_96
			
 
				+	{ 7, 0, 0 }, // QUANT_128
			
 
				+	{ 5, 0, 1 }, // QUANT_160
			
 
				+	{ 6, 1, 0 }, // QUANT_192
			
 
				+	{ 8, 0, 0 }  // QUANT_256
			
 
				 }};
			
 
				 
			
 
				 /**
			
@@ -382,44 +381,38 @@ static const std::array<btq_count, 21> btq_counts {{
 
				  */
			
 
				 struct ise_size
			
 
				 {
			
 
				-	/** @brief The quantization level. */
			
 
				-	uint8_t quant;
			
 
				-
			
 
				 	/** @brief The scaling parameter. */
			
 
				-	uint8_t scale;
			
 
				-
			
 
				-	/** @brief The rounding parameter. */
			
 
				-	uint8_t round;
			
 
				+	uint8_t scale:6;
			
 
				 
			
 
				 	/** @brief The divisor parameter. */
			
 
				-	uint8_t divisor;
			
 
				+	uint8_t divisor:2;
			
 
				 };
			
 
				 
			
 
				 /**
			
 
				  * @brief The table of scale, round, and divisors needed for quant sizing.
			
 
				  */
			
 
				 static const std::array<ise_size, 21> ise_sizes {{
			
 
				-	{   QUANT_2,  1, 0, 1 },
			
 
				-	{   QUANT_3,  8, 4, 5 },
			
 
				-	{   QUANT_4,  2, 0, 1 },
			
 
				-	{   QUANT_5,  7, 2, 3 },
			
 
				-	{   QUANT_6, 13, 4, 5 },
			
 
				-	{   QUANT_8,  3, 0, 1 },
			
 
				-	{  QUANT_10, 10, 2, 3 },
			
 
				-	{  QUANT_12, 18, 4, 5 },
			
 
				-	{  QUANT_16,  4, 0, 1 },
			
 
				-	{  QUANT_20, 13, 2, 3 },
			
 
				-	{  QUANT_24, 23, 4, 5 },
			
 
				-	{  QUANT_32,  5, 0, 1 },
			
 
				-	{  QUANT_40, 16, 2, 3 },
			
 
				-	{  QUANT_48, 28, 4, 5 },
			
 
				-	{  QUANT_64,  6, 0, 1 },
			
 
				-	{  QUANT_80, 19, 2, 3 },
			
 
				-	{  QUANT_96, 33, 4, 5 },
			
 
				-	{ QUANT_128,  7, 0, 1 },
			
 
				-	{ QUANT_160, 22, 2, 3 },
			
 
				-	{ QUANT_192, 38, 4, 5 },
			
 
				-	{ QUANT_256,  8, 0, 1 }
			
 
				+	{  1, 0 }, // QUANT_2
			
 
				+	{  8, 2 }, // QUANT_3
			
 
				+	{  2, 0 }, // QUANT_4
			
 
				+	{  7, 1 }, // QUANT_5
			
 
				+	{ 13, 2 }, // QUANT_6
			
 
				+	{  3, 0 }, // QUANT_8
			
 
				+	{ 10, 1 }, // QUANT_10
			
 
				+	{ 18, 2 }, // QUANT_12
			
 
				+	{  4, 0 }, // QUANT_16
			
 
				+	{ 13, 1 }, // QUANT_20
			
 
				+	{ 23, 2 }, // QUANT_24
			
 
				+	{  5, 0 }, // QUANT_32
			
 
				+	{ 16, 1 }, // QUANT_40
			
 
				+	{ 28, 2 }, // QUANT_48
			
 
				+	{  6, 0 }, // QUANT_64
			
 
				+	{ 19, 1 }, // QUANT_80
			
 
				+	{ 33, 2 }, // QUANT_96
			
 
				+	{  7, 0 }, // QUANT_128
			
 
				+	{ 22, 1 }, // QUANT_160
			
 
				+	{ 38, 2 }, // QUANT_192
			
 
				+	{  8, 0 }  // QUANT_256
			
 
				 }};
			
 
				 
			
 
				 /* See header for documentation. */
			
@@ -435,7 +428,8 @@ unsigned int get_ise_sequence_bitcount(
 
				 	}
			
 
				 
			
 
				 	auto& entry = ise_sizes[quant_level];
			
 
				-	return (entry.scale * character_count + entry.round) / entry.divisor;
			
 
				+	unsigned int divisor = (entry.divisor << 1) + 1;
			
 
				+	return (entry.scale * character_count + divisor - 1) / divisor;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -645,7 +639,6 @@ void encode_ise(
 
				 	// Write out just bits
			
 
				 	else
			
 
				 	{
			
 
				-		promise(character_count > 0);
			
 
				 		for (unsigned int i = 0; i < character_count; i++)
			
 
				 		{
			
 
				 			write_bits(input_data[i], bits, bit_offset, output_data);
			
@@ -685,10 +678,10 @@ void decode_ise(
 
				 
			
 
				 		if (trits)
			
 
				 		{
			
 
				-			static const unsigned int bits_to_read[5]  { 2, 2, 1, 2, 1 };
			
 
				-			static const unsigned int block_shift[5]   { 0, 2, 4, 5, 7 };
			
 
				-			static const unsigned int next_lcounter[5] { 1, 2, 3, 4, 0 };
			
 
				-			static const unsigned int hcounter_incr[5] { 0, 0, 0, 0, 1 };
			
 
				+			static const uint8_t bits_to_read[5]  { 2, 2, 1, 2, 1 };
			
 
				+			static const uint8_t block_shift[5]   { 0, 2, 4, 5, 7 };
			
 
				+			static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 };
			
 
				+			static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 };
			
 
				 			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
			
 
				 			bit_offset += bits_to_read[lcounter];
			
 
				 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
			
@@ -698,10 +691,10 @@ void decode_ise(
 
				 
			
 
				 		if (quints)
			
 
				 		{
			
 
				-			static const unsigned int bits_to_read[3]  { 3, 2, 2 };
			
 
				-			static const unsigned int block_shift[3]   { 0, 3, 5 };
			
 
				-			static const unsigned int next_lcounter[3] { 1, 2, 0 };
			
 
				-			static const unsigned int hcounter_incr[3] { 0, 0, 1 };
			
 
				+			static const uint8_t bits_to_read[3]  { 3, 2, 2 };
			
 
				+			static const uint8_t block_shift[3]   { 0, 3, 5 };
			
 
				+			static const uint8_t next_lcounter[3] { 1, 2, 0 };
			
 
				+			static const uint8_t hcounter_incr[3] { 0, 0, 1 };
			
 
				 			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
			
 
				 			bit_offset += bits_to_read[lcounter];
			
 
				 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
			
@@ -714,6 +707,7 @@ void decode_ise(
 
				 	if (trits)
			
 
				 	{
			
 
				 		unsigned int trit_blocks = (character_count + 4) / 5;
			
 
				+		promise(trit_blocks > 0);
			
 
				 		for (unsigned int i = 0; i < trit_blocks; i++)
			
 
				 		{
			
 
				 			const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
			
@@ -728,6 +722,7 @@ void decode_ise(
 
				 	if (quints)
			
 
				 	{
			
 
				 		unsigned int quint_blocks = (character_count + 2) / 3;
			
 
				+		promise(quint_blocks > 0);
			
 
				 		for (unsigned int i = 0; i < quint_blocks; i++)
			
 
				 		{
			
 
				 			const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
			
--- a/3rdparty/astc-encoder/source/astcenc_internal.h
+++ b/3rdparty/astc-encoder/source/astcenc_internal.h
@@ -1008,9 +1008,6 @@ struct dt_init_working_buffers
 
				 */
			
 
				 struct quant_and_transfer_table
			
 
				 {
			
 
				-	/** @brief The quantization level used. */
			
 
				-	quant_method method;
			
 
				-
			
 
				 	/** @brief The unscrambled unquantized value. */
			
 
				 	int8_t quant_to_unquant[32];
			
 
				 
			
--- a/3rdparty/astc-encoder/source/astcenc_mathlib_softfloat.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_mathlib_softfloat.cpp
@@ -15,13 +15,13 @@
 
				 // under the License.
			
 
				 // ----------------------------------------------------------------------------
			
 
				 
			
 
				+#include "astcenc_mathlib.h"
			
 
				+
			
 
				 /**
			
 
				  * @brief Soft-float library for IEEE-754.
			
 
				  */
			
 
				 #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
			
 
				 
			
 
				-#include "astcenc_mathlib.h"
			
 
				-
			
 
				 /*	sized soft-float types. These are mapped to the sized integer
			
 
				     types of C99, instead of C's floating-point types; this is because
			
 
				     the library needs to maintain exact, bit-level control on all
			
--- a/3rdparty/astc-encoder/source/astcenc_percentile_tables.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_percentile_tables.cpp
@@ -1166,11 +1166,11 @@ const float *get_2d_percentile_table(
 
				 	unsigned int xdim,
			
 
				 	unsigned int ydim
			
 
				 ) {
			
 
				-	float* unpacked_table = new float[2048];
			
 
				+	float* unpacked_table = new float[WEIGHTS_MAX_BLOCK_MODES];
			
 
				 	const packed_percentile_table *apt = get_packed_table(xdim, ydim);
			
 
				 
			
 
				 	// Set the default percentile
			
 
				-	for (unsigned int i = 0; i < 2048; i++)
			
 
				+	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
			
 
				 	{
			
 
				 		unpacked_table[i] = 1.0f;
			
 
				 	}
			
--- a/3rdparty/astc-encoder/source/astcenc_pick_best_endpoint_format.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_pick_best_endpoint_format.cpp
@@ -325,11 +325,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 
				 ) {
			
 
				 	int partition_size = pi.partition_texel_count[partition_index];
			
 
				 
			
 
				-	static const float baseline_quant_error[21] {
			
 
				-		(65536.0f * 65536.0f / 18.0f),				// 2 values, 1 step
			
 
				-		(65536.0f * 65536.0f / 18.0f) / (2 * 2),	// 3 values, 2 steps
			
 
				-		(65536.0f * 65536.0f / 18.0f) / (3 * 3),	// 4 values, 3 steps
			
 
				-		(65536.0f * 65536.0f / 18.0f) / (4 * 4),	// 5 values
			
 
				+	static const float baseline_quant_error[21 - QUANT_6] {
			
 
				 		(65536.0f * 65536.0f / 18.0f) / (5 * 5),
			
 
				 		(65536.0f * 65536.0f / 18.0f) / (7 * 7),
			
 
				 		(65536.0f * 65536.0f / 18.0f) / (9 * 9),
			
@@ -528,7 +524,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 
				 			// The base_quant_error should depend on the scale-factor that would be used during
			
 
				 			// actual encode of the color value
			
 
				 
			
 
				-			float base_quant_error = baseline_quant_error[i] * static_cast<float>(partition_size);
			
 
				+			float base_quant_error = baseline_quant_error[i - QUANT_6] * static_cast<float>(partition_size);
			
 
				 			float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
			
 
				 			float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f;
			
 
				 			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
			
@@ -591,7 +587,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 
				 				error_scale_oe_rgb = 1.0f;
			
 
				 			}
			
 
				 
			
 
				-			float base_quant_error = baseline_quant_error[i];
			
 
				+			float base_quant_error = baseline_quant_error[i - QUANT_6];
			
 
				 			float quant_error_rgb  = base_quant_error_rgb * base_quant_error;
			
 
				 			float quant_error_rgba = base_quant_error_rgba * base_quant_error;
			
 
				 
			
@@ -1136,22 +1132,19 @@ unsigned int compute_ideal_endpoint_formats(
 
				 	uint8_t (&best_ep_formats)[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS] = tmpbuf.best_ep_formats;
			
 
				 
			
 
				 	// Ensure that the first iteration understep contains data that will never be picked
			
 
				+	vfloat clear_error(ERROR_CALC_DEFAULT);
			
 
				+	vint clear_quant(0);
			
 
				+
			
 
				 	unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
			
 
				-	for (unsigned int i = packed_start_block_mode; i < start_block_mode; i++)
			
 
				-	{
			
 
				-		errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
			
 
				-		best_quant_levels[i] = QUANT_2;
			
 
				-		best_quant_levels_mod[i] = QUANT_2;
			
 
				-	}
			
 
				+	storea(clear_error, errors_of_best_combination + packed_start_block_mode);
			
 
				+	store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode);
			
 
				+	store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode);
			
 
				 
			
 
				 	// Ensure that last iteration overstep contains data that will never be picked
			
 
				-	const unsigned int packed_end_block_mode = round_up_to_simd_multiple_vla(end_block_mode);
			
 
				-	for (unsigned int i = end_block_mode; i < packed_end_block_mode; i++)
			
 
				-	{
			
 
				-		errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
			
 
				-		best_quant_levels[i] = QUANT_2;
			
 
				-		best_quant_levels_mod[i] = QUANT_2;
			
 
				-	}
			
 
				+	unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1);
			
 
				+	storea(clear_error, errors_of_best_combination + packed_end_block_mode);
			
 
				+	store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode);
			
 
				+	store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode);
			
 
				 
			
 
				 	// Track a scalar best to avoid expensive search at least once ...
			
 
				 	float error_of_best_combination = ERROR_CALC_DEFAULT;
			
--- a/3rdparty/astc-encoder/source/astcenc_platform_isa_detection.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_platform_isa_detection.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2020-2021 Arm Limited
			
 
				+// Copyright 2020-2022 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -47,7 +47,7 @@ static bool g_cpu_has_f16c { false };
 
				 ============================================================================ */
			
 
				 #if !defined(__clang__) && defined(_MSC_VER)
			
 
				 #define WIN32_LEAN_AND_MEAN
			
 
				-#include <Windows.h>
			
 
				+#include <windows.h>
			
 
				 #include <intrin.h>
			
 
				 
			
 
				 /**
			
--- a/3rdparty/astc-encoder/source/astcenc_symbolic_physical.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_symbolic_physical.cpp
@@ -371,12 +371,15 @@ void physical_to_symbolic(
 
				 	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
			
 
				 
			
 
				 	int weight_count = di.weight_count;
			
 
				+	promise(weight_count > 0);
			
 
				+
			
 
				 	quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode);
			
 
				 	int is_dual_plane = bm.is_dual_plane;
			
 
				 
			
 
				 	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
			
 
				 
			
 
				 	int partition_count = read_bits(2, 11, pcb.data) + 1;
			
 
				+	promise(partition_count > 0);
			
 
				 
			
 
				 	scb.block_mode = static_cast<uint16_t>(block_mode);
			
 
				 	scb.partition_count = static_cast<uint8_t>(partition_count);
			
@@ -523,6 +526,7 @@ void physical_to_symbolic(
 
				 	}
			
 
				 
			
 
				 	// Fetch component for second-plane in the case of dual plane of weights.
			
 
				+	scb.plane2_component = -1;
			
 
				 	if (is_dual_plane)
			
 
				 	{
			
 
				 		scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb.data));
			
--- a/3rdparty/astc-encoder/source/astcenc_vecmathlib.h
+++ b/3rdparty/astc-encoder/source/astcenc_vecmathlib.h
@@ -26,7 +26,7 @@
 
				  * with that is available at compile time. The current vector width is
			
 
				  * accessible for e.g. loop strides via the ASTCENC_SIMD_WIDTH constant.
			
 
				  *
			
 
				- * Explicit scalar types are acessible via the vint1, vfloat1, vmask1 types.
			
 
				+ * Explicit scalar types are accessible via the vint1, vfloat1, vmask1 types.
			
 
				  * These are provided primarily for prototyping and algorithm debug of VLA
			
 
				  * implementations.
			
 
				  *
			
@@ -402,7 +402,7 @@ static ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
 
				 	// the original integer value into a 2^N encoding we can recover easily.
			
 
				 
			
 
				 	// Convert to float without risk of rounding up by keeping only top 8 bits.
			
 
				-	// This trick is is guranteed to keep top 8 bits and clear the 9th.
			
 
				+	// This trick is is guaranteed to keep top 8 bits and clear the 9th.
			
 
				 	a = (~lsr<8>(a)) & a;
			
 
				 	a = float_as_int(int_to_float(a));
			
 
				 
			
--- a/3rdparty/astc-encoder/source/astcenc_vecmathlib_neon_4.h
+++ b/3rdparty/astc-encoder/source/astcenc_vecmathlib_neon_4.h
@@ -106,7 +106,7 @@ struct vfloat4
 
				 	 */
			
 
				 	template <int l> ASTCENC_SIMD_INLINE void set_lane(float a)
			
 
				 	{
			
 
				-		m = vld1q_lane_f32(&a, m, l);
			
 
				+		m = vsetq_lane_f32(a, m, l);
			
 
				 	}
			
 
				 
			
 
				 	/**
			
@@ -122,7 +122,7 @@ struct vfloat4
 
				 	 */
			
 
				 	static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p)
			
 
				 	{
			
 
				-		return vfloat4(vdupq_n_f32(*p));
			
 
				+		return vfloat4(vld1q_dup_f32(p));
			
 
				 	}
			
 
				 
			
 
				 	/**
			
@@ -202,9 +202,8 @@ struct vint4
 
				 	 */
			
 
				 	ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
			
 
				 	{
			
 
				-		uint32x2_t t8 {};
			
 
				 		// Cast is safe - NEON loads are allowed to be unaligned
			
 
				-		t8 = vld1_lane_u32(reinterpret_cast<const uint32_t*>(p), t8, 0);
			
 
				+		uint32x2_t t8 = vld1_dup_u32(reinterpret_cast<const uint32_t*>(p));
			
 
				 		uint16x4_t t16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(t8)));
			
 
				 		m = vreinterpretq_s32_u32(vmovl_u16(t16));
			
 
				 	}
			
@@ -251,7 +250,7 @@ struct vint4
 
				 	 */
			
 
				 	template <int l> ASTCENC_SIMD_INLINE void set_lane(int a)
			
 
				 	{
			
 
				-		m = vld1q_lane_s32(&a, m, l);
			
 
				+		m = vsetq_lane_s32(a, m, l);
			
 
				 	}
			
 
				 
			
 
				 	/**
			
--- a/3rdparty/astc-encoder/source/astcenc_weight_quant_xfer_tables.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_weight_quant_xfer_tables.cpp
@@ -24,9 +24,8 @@
 
				 #define _ 0 // Using _ to indicate an entry that will not be used.
			
 
				 
			
 
				 const quant_and_transfer_table quant_and_xfer_tables[12] {
			
 
				-	// Quantization method 0, range 0..1
			
 
				+	// QUANT2, range 0..1
			
 
				 	{
			
 
				-		QUANT_2,
			
 
				 		{0, 64},
			
 
				 		{0, 1},
			
 
				 		{0, 64},
			
@@ -34,9 +33,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
			
 
				 		 0x4000}
			
 
				 	},
			
 
				-	// Quantization method 1, range 0..2
			
 
				+	// QUANT_3, range 0..2
			
 
				 	{
			
 
				-		QUANT_3,
			
 
				 		{0, 32, 64},
			
 
				 		{0, 1, 2},
			
 
				 		{0, 32, 64},
			
@@ -44,19 +42,17 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,_,0x4000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
			
 
				 		 _,_,_,_,0x4020}
			
 
				 	},
			
 
				-	// Quantization method 2, range 0..3
			
 
				+	// QUANT_4, range 0..3
			
 
				 	{
			
 
				-		QUANT_4,
			
 
				 		{0, 21, 43, 64},
			
 
				 		{0, 1, 2, 3},
			
 
				-		 {0, 21, 43, 64},
			
 
				+		{0, 21, 43, 64},
			
 
				 		{0x1500,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x2b00,_,_,_,_,
			
 
				 		 _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x4015,_,_,_,_,_,_,_,_,_,_,_,_,
			
 
				 		 _,_,_,_,_,_,_,_,0x402b}
			
 
				 	},
			
 
				-	// Quantization method 3, range 0..4
			
 
				+	//QUANT_5, range 0..4
			
 
				 	{
			
 
				-		QUANT_5,
			
 
				 		{0, 16, 32, 48, 64},
			
 
				 		{0, 1, 2, 3, 4},
			
 
				 		{0, 16, 32, 48, 64},
			
@@ -64,9 +60,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,_,_,_,_,_,0x3010,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x4020,_,_,_,
			
 
				 		 _,_,_,_,_,_,_,_,_,_,_,_,0x4030}
			
 
				 	},
			
 
				-	// Quantization method 4, range 0..5
			
 
				+	// QUANT_6, range 0..5
			
 
				 	{
			
 
				-		QUANT_6,
			
 
				 		{0, 12, 25, 39, 52, 64},
			
 
				 		{0, 2, 4, 5, 3, 1},
			
 
				 		{0, 64, 12, 52, 25, 39},
			
@@ -74,9 +69,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 0x270c,_,_,_,_,_,_,_,_,_,_,_,_,_,0x3419,_,_,_,_,_,_,_,_,_,_,
			
 
				 		 _,_,0x4027,_,_,_,_,_,_,_,_,_,_,_,0x4034}
			
 
				 	},
			
 
				-	// Quantization method 5, range 0..7
			
 
				+	// QUANT_8, range 0..7
			
 
				 	{
			
 
				-		QUANT_8,
			
 
				 		{0, 9, 18, 27, 37, 46, 55, 64},
			
 
				 		{0, 1, 2, 3, 4, 5, 6, 7},
			
 
				 		{0, 9, 18, 27, 37, 46, 55, 64},
			
@@ -84,9 +78,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,_,_,_,_,_,0x2512,_,_,_,_,_,_,_,_,_,0x2e1b,_,_,_,_,_,_,_,_,
			
 
				 		 0x3725,_,_,_,_,_,_,_,_,0x402e,_,_,_,_,_,_,_,_,0x4037}
			
 
				 	},
			
 
				-	// Quantization method 6, range 0..9
			
 
				+	// QUANT_10, range 0..9
			
 
				 	{
			
 
				-		QUANT_10,
			
 
				 		{0, 7, 14, 21, 28, 36, 43, 50, 57, 64},
			
 
				 		{0, 2, 4, 6, 8, 9, 7, 5, 3, 1},
			
 
				 		{0, 64, 7, 57, 14, 50, 21, 43, 28, 36},
			
@@ -95,9 +88,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,0x3224,_,_,_,_,_,_,0x392b,_,_,_,_,_,_,0x4032,_,_,_,_,_,
			
 
				 		 _,0x4039}
			
 
				 	},
			
 
				-	// Quantization method 7, range 0..11
			
 
				+	// QUANT_12, range 0..11
			
 
				 	{
			
 
				-		QUANT_12,
			
 
				 		{0, 5, 11, 17, 23, 28, 36, 41, 47, 53, 59, 64},
			
 
				 		{0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1},
			
 
				 		{0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36},
			
@@ -106,9 +98,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 0x291c,_,_,_,_,0x2f24,_,_,_,_,_,0x3529,_,_,_,_,_,
			
 
				 		 0x3b2f,_,_,_,_,_,0x4035,_,_,_,_,0x403b}
			
 
				 	},
			
 
				-	// Quantization method 8, range 0..15
			
 
				+	// QUANT_16, range 0..15
			
 
				 	{
			
 
				-		QUANT_16,
			
 
				 		{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
			
 
				 		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
			
 
				 		{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
			
@@ -117,9 +108,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,0x271d,_,_,_,0x2b23,_,_,_,0x2f27,_,_,_,0x342b,_,_,_,
			
 
				 		 _,0x382f,_,_,_,0x3c34,_,_,_,0x4038,_,_,_,0x403c}
			
 
				 	},
			
 
				-	// Quantization method 9, range 0..19
			
 
				+	// QUANT_20, range 0..19
			
 
				 	{
			
 
				-		QUANT_20,
			
 
				 		{0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 35, 38, 41, 45, 48, 51, 55, 58, 61, 64},
			
 
				 		{0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1},
			
 
				 		{0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35},
			
@@ -129,9 +119,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 0x2d26,_,_,_,0x3029,_,_,0x332d,_,_,0x3730,_,_,_,
			
 
				 		 0x3a33,_,_,0x3d37,_,_,0x403a,_,_,0x403d}
			
 
				 	},
			
 
				-	// Quantization method 10, range 0..23
			
 
				+	// QUANT_24, range 0..23
			
 
				 	{
			
 
				-		QUANT_24,
			
 
				 		{0, 2, 5, 8, 11, 13, 16, 19, 22, 24, 27, 30, 34, 37, 40, 42, 45, 48, 51, 53, 56, 59, 62, 64},
			
 
				 		{0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19, 11, 3, 17, 9, 1},
			
 
				 		{0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34},
			
@@ -142,9 +131,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
 
				 		 _,_,0x3530,_,0x3833,_,_,0x3b35,_,_,0x3e38,_,_,
			
 
				 		 0x403b,_,0x403e}
			
 
				 	},
			
 
				-	// Quantization method 11, range 0..31
			
 
				+	// QUANT_32, range 0..31
			
 
				 	{
			
 
				-		QUANT_32,
			
 
				 		{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},
			
 
				 		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
			
 
				 		{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},