Jelajahi Sumber

astcenc: Update to 4.4.0

> The 4.4.0 release is a minor release with image quality improvements,
> a small performance boost, a few new quality-of-life features, and a
> few minor fixes for uncommon build configurations.

https://github.com/ARM-software/astc-encoder/releases/tag/4.4.0
Rémi Verschelde 2 tahun lalu
induk
melakukan
5a3f955e05

+ 0 - 1
modules/astcenc/SCsub

@@ -29,7 +29,6 @@ thirdparty_sources = [
     "astcenc_partition_tables.cpp",
     "astcenc_percentile_tables.cpp",
     "astcenc_pick_best_endpoint_format.cpp",
-    "astcenc_platform_isa_detection.cpp",
     "astcenc_quantization.cpp",
     "astcenc_symbolic_physical.cpp",
     "astcenc_weight_align.cpp",

+ 1 - 1
thirdparty/README.md

@@ -20,7 +20,7 @@ Files extracted from upstream source:
 ## astcenc
 
 - Upstream: https://github.com/ARM-software/astc-encoder
-- Version: 4.3.0 (ec83dda79fcefe07f69cdae7ed980d169bf2c4d4, 2023)
+- Version: 4.4.0 (5a5b5a1ef60dd47c27c28c66c118d22c40e3197e, 2023)
 - License: Apache 2.0
 
 Files extracted from upstream source:

+ 12 - 6
thirdparty/astcenc/astcenc.h

@@ -43,6 +43,14 @@
  *       for faster processing. The caller is responsible for creating the worker threads, and
  *       synchronizing between images.
  *
+ * Extended instruction set support
+ * ================================
+ *
+ * This library supports use of extended instruction sets, such as SSE4.1 and AVX2. These are
+ * enabled at compile time when building the library. There is no runtime checking in the core
+ * library that the instruction sets used are actually available. Checking compatibility is the
+ * responsibility of the calling code.
+ *
  * Threading
  * =========
  *
@@ -191,8 +199,6 @@ enum astcenc_error {
 	ASTCENC_ERR_OUT_OF_MEM,
 	/** @brief The call failed due to the build using fast math. */
 	ASTCENC_ERR_BAD_CPU_FLOAT,
-	/** @brief The call failed due to the build using an unsupported ISA. */
-	ASTCENC_ERR_BAD_CPU_ISA,
 	/** @brief The call failed due to an out-of-spec parameter. */
 	ASTCENC_ERR_BAD_PARAM,
 	/** @brief The call failed due to an out-of-spec block size. */
@@ -472,7 +478,7 @@ struct astcenc_config
 	/**
 	 * @brief The number of trial candidates per mode search (-candidatelimit).
 	 *
-	 * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES (default 4).
+	 * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES.
 	 */
 	unsigned int tune_candidate_limit;
 
@@ -520,21 +526,21 @@ struct astcenc_config
 	 *
 	 * This option is further scaled for normal maps, so it skips less often.
 	 */
-	float tune_2_partition_early_out_limit_factor;
+	float tune_2partition_early_out_limit_factor;
 
 	/**
 	 * @brief The threshold for skipping 4.1 trials (-3partitionlimitfactor).
 	 *
 	 * This option is further scaled for normal maps, so it skips less often.
 	 */
-	float tune_3_partition_early_out_limit_factor;
+	float tune_3partition_early_out_limit_factor;
 
 	/**
 	 * @brief The threshold for skipping two weight planes (-2planelimitcorrelation).
 	 *
 	 * This option is ineffective for normal maps.
 	 */
-	float tune_2_plane_early_out_limit_correlation;
+	float tune_2plane_early_out_limit_correlation;
 
 #if defined(ASTCENC_DIAGNOSTICS)
 	/**

+ 6 - 53
thirdparty/astcenc/astcenc_averages_and_directions.cpp

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2022 Arm Limited
+// Copyright 2011-2023 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -725,8 +725,7 @@ void compute_error_squared_rgba(
 	const image_block& blk,
 	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
 	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
-	float uncor_lengths[BLOCK_MAX_PARTITIONS],
-	float samec_lengths[BLOCK_MAX_PARTITIONS],
+	float line_lengths[BLOCK_MAX_PARTITIONS],
 	float& uncor_error,
 	float& samec_error
 ) {
@@ -740,12 +739,6 @@ void compute_error_squared_rgba(
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
 
-		float uncor_loparam = 1e10f;
-		float uncor_hiparam = -1e10f;
-
-		float samec_loparam = 1e10f;
-		float samec_hiparam = -1e10f;
-
 		processed_line4 l_uncor = uncor_plines[partition];
 		processed_line4 l_samec = samec_plines[partition];
 
@@ -773,9 +766,6 @@ void compute_error_squared_rgba(
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
 
-		vfloat samec_loparamv(1e10f);
-		vfloat samec_hiparamv(-1e10f);
-
 		vfloat ew_r(blk.channel_weight.lane<0>());
 		vfloat ew_g(blk.channel_weight.lane<1>());
 		vfloat ew_b(blk.channel_weight.lane<2>());
@@ -825,9 +815,6 @@ void compute_error_squared_rgba(
 			                   + (data_b * l_samec_bs2)
 			                   + (data_a * l_samec_bs3);
 
-			samec_loparamv = min(samec_param, samec_loparamv);
-			samec_hiparamv = max(samec_param, samec_hiparamv);
-
 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
@@ -843,18 +830,9 @@ void compute_error_squared_rgba(
 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
-		uncor_loparam = hmin_s(uncor_loparamv);
-		uncor_hiparam = hmax_s(uncor_hiparamv);
-
-		samec_loparam = hmin_s(samec_loparamv);
-		samec_hiparam = hmax_s(samec_hiparamv);
-
-		float uncor_linelen = uncor_hiparam - uncor_loparam;
-		float samec_linelen = samec_hiparam - samec_loparam;
-
 		// Turn very small numbers and NaNs into a small number
-		uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
-		samec_lengths[partition] = astc::max(samec_linelen, 1e-7f);
+		float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
+		line_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
 	}
 
 	uncor_error = hadd_s(uncor_errorsumv);
@@ -882,19 +860,9 @@ void compute_error_squared_rgb(
 		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		float uncor_loparam = 1e10f;
-		float uncor_hiparam = -1e10f;
-
-		float samec_loparam = 1e10f;
-		float samec_hiparam = -1e10f;
-
 		processed_line3 l_uncor = pl.uncor_pline;
 		processed_line3 l_samec = pl.samec_pline;
 
-		// This implementation is an example vectorization of this function.
-		// It works for - the codec is a 2-4% faster than not vectorizing - but
-		// the benefit is limited by the use of gathers and register pressure
-
 		// Vectorize some useful scalar inputs
 		vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
 		vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
@@ -913,9 +881,6 @@ void compute_error_squared_rgb(
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
 
-		vfloat samec_loparamv(1e10f);
-		vfloat samec_hiparamv(-1e10f);
-
 		vfloat ew_r(blk.channel_weight.lane<0>());
 		vfloat ew_g(blk.channel_weight.lane<1>());
 		vfloat ew_b(blk.channel_weight.lane<2>());
@@ -958,9 +923,6 @@ void compute_error_squared_rgb(
 			                   + (data_g * l_samec_bs1)
 			                   + (data_b * l_samec_bs2);
 
-			samec_loparamv = min(samec_param, samec_loparamv);
-			samec_hiparamv = max(samec_param, samec_hiparamv);
-
 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
@@ -974,18 +936,9 @@ void compute_error_squared_rgb(
 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
-		uncor_loparam = hmin_s(uncor_loparamv);
-		uncor_hiparam = hmax_s(uncor_hiparamv);
-
-		samec_loparam = hmin_s(samec_loparamv);
-		samec_hiparam = hmax_s(samec_hiparamv);
-
-		float uncor_linelen = uncor_hiparam - uncor_loparam;
-		float samec_linelen = samec_hiparam - samec_loparam;
-
 		// Turn very small numbers and NaNs into a small number
-		pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f);
-		pl.samec_line_len = astc::max(samec_linelen, 1e-7f);
+		float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
+		pl.line_length = astc::max(uncor_linelen, 1e-7f);
 	}
 
 	uncor_error = hadd_s(uncor_errorsumv);

+ 10 - 10
thirdparty/astcenc/astcenc_block_sizes.cpp

@@ -776,8 +776,8 @@ static void construct_dt_entry_2d(
 	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
 	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
 	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
-	bsd.decimation_modes[index].refprec_1_plane = 0;
-	bsd.decimation_modes[index].refprec_2_planes = 0;
+	bsd.decimation_modes[index].refprec_1plane = 0;
+	bsd.decimation_modes[index].refprec_2planes = 0;
 }
 
 /**
@@ -934,11 +934,11 @@ static void construct_block_size_descriptor_2d(
 
 			if (is_dual_plane)
 			{
-				dm.set_ref_2_plane(bm.get_weight_quant_mode());
+				dm.set_ref_2plane(bm.get_weight_quant_mode());
 			}
 			else
 			{
-				dm.set_ref_1_plane(bm.get_weight_quant_mode());
+				dm.set_ref_1plane(bm.get_weight_quant_mode());
 			}
 
 			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
@@ -969,8 +969,8 @@ static void construct_block_size_descriptor_2d(
 	{
 		bsd.decimation_modes[i].maxprec_1plane = -1;
 		bsd.decimation_modes[i].maxprec_2planes = -1;
-		bsd.decimation_modes[i].refprec_1_plane = 0;
-		bsd.decimation_modes[i].refprec_2_planes = 0;
+		bsd.decimation_modes[i].refprec_1plane = 0;
+		bsd.decimation_modes[i].refprec_2planes = 0;
 	}
 
 	// Determine the texels to use for kmeans clustering.
@@ -1055,8 +1055,8 @@ static void construct_block_size_descriptor_3d(
 
 				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
 				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
-				bsd.decimation_modes[decimation_mode_count].refprec_1_plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
-				bsd.decimation_modes[decimation_mode_count].refprec_2_planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
+				bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
+				bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
 				decimation_mode_count++;
 			}
 		}
@@ -1067,8 +1067,8 @@ static void construct_block_size_descriptor_3d(
 	{
 		bsd.decimation_modes[i].maxprec_1plane = -1;
 		bsd.decimation_modes[i].maxprec_2planes = -1;
-		bsd.decimation_modes[i].refprec_1_plane = 0;
-		bsd.decimation_modes[i].refprec_2_planes = 0;
+		bsd.decimation_modes[i].refprec_1plane = 0;
+		bsd.decimation_modes[i].refprec_2planes = 0;
 	}
 
 	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes

+ 65 - 77
thirdparty/astcenc/astcenc_color_quantize.cpp

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2023 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -44,17 +44,43 @@
  * @brief Determine the quantized value given a quantization level.
  *
  * @param quant_level   The quantization level to use.
- * @param value         The value to convert. This may be outside of the 0-255 range and will be
- *                      clamped before the value is looked up.
+ * @param value         The value to convert. This must be in the 0-255 range.
  *
- * @return The encoded quantized value. These are not necessarily in order; the compressor
- *         scrambles the values slightly to make hardware implementation easier.
+ * @return The unpacked quantized value, returned in 0-255 range.
  */
 static inline uint8_t quant_color(
 	quant_method quant_level,
 	int value
 ) {
-	return color_unquant_to_uquant_tables[quant_level - QUANT_6][value];
+	int index = value * 2 + 1;
+	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
+}
+
+/**
+ * @brief Determine the quantized value given a quantization level and residual.
+ *
+ * @param quant_level   The quantization level to use.
+ * @param value         The value to convert. This must be in the 0-255 range.
+ * @param valuef        The original value before rounding, used to compute a residual.
+ *
+ * @return The unpacked quantized value, returned in 0-255 range.
+ */
+static inline uint8_t quant_color(
+	quant_method quant_level,
+	int value,
+	float valuef
+) {
+	int index = value * 2;
+
+	// Compute the residual to determine if we should round down or up ties.
+	// Test should be residual >= 0, but empirical testing shows small bias helps.
+	float residual = valuef - static_cast<float>(value);
+	if (residual >= -0.1f)
+	{
+		index++;
+	}
+
+	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
 }
 
 /**
@@ -86,16 +112,16 @@ static void quantize_rgb(
 	float b1 = astc::clamp255f(color1.lane<2>() * scale);
 
 	int ri0, gi0, bi0, ri1, gi1, bi1;
-	float rgb0_addon = 0.5f;
-	float rgb1_addon = 0.5f;
+	float rgb0_addon = 0.0f;
+	float rgb1_addon = 0.0f;
 	do
 	{
-		ri0 = quant_color(quant_level, astc::max(astc::flt2int_rd(r0 + rgb0_addon), 0));
-		gi0 = quant_color(quant_level, astc::max(astc::flt2int_rd(g0 + rgb0_addon), 0));
-		bi0 = quant_color(quant_level, astc::max(astc::flt2int_rd(b0 + rgb0_addon), 0));
-		ri1 = quant_color(quant_level, astc::min(astc::flt2int_rd(r1 + rgb1_addon), 255));
-		gi1 = quant_color(quant_level, astc::min(astc::flt2int_rd(g1 + rgb1_addon), 255));
-		bi1 = quant_color(quant_level, astc::min(astc::flt2int_rd(b1 + rgb1_addon), 255));
+		ri0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(r0 + rgb0_addon), 0), r0 + rgb0_addon);
+		gi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(g0 + rgb0_addon), 0), g0 + rgb0_addon);
+		bi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(b0 + rgb0_addon), 0), b0 + rgb0_addon);
+		ri1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(r1 + rgb1_addon), 255), r1 + rgb1_addon);
+		gi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(g1 + rgb1_addon), 255), g1 + rgb1_addon);
+		bi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(b1 + rgb1_addon), 255), b1 + rgb1_addon);
 
 		rgb0_addon -= 0.2f;
 		rgb1_addon += 0.2f;
@@ -133,8 +159,8 @@ static void quantize_rgba(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0));
-	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1));
+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
 
 	quantize_rgb(color0, color1, output, quant_level);
 }
@@ -180,13 +206,13 @@ static bool try_quantize_rgb_blue_contract(
 	}
 
 	// Quantize the inverse-blue-contracted color
-	int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0));
-	int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0));
-	int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0));
+	int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0), r0);
+	int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0), g0);
+	int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0), b0);
 
-	int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1));
-	int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1));
-	int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1));
+	int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1), r1);
+	int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1), g1);
+	int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1), b1);
 
 	// If color #1 is not larger than color #0 then blue-contraction cannot be used. Note that
 	// blue-contraction and quantization change this order, which is why we must test afterwards.
@@ -217,7 +243,7 @@ static bool try_quantize_rgb_blue_contract(
  *
  * @return Returns @c false on failure, @c true on success.
  */
-static int try_quantize_rgba_blue_contract(
+static bool try_quantize_rgba_blue_contract(
 	vfloat4 color0,
 	vfloat4 color1,
 	uint8_t output[8],
@@ -228,8 +254,8 @@ static int try_quantize_rgba_blue_contract(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[6] = quant_color(quant_level, astc::flt2int_rtn(a1));
-	output[7] = quant_color(quant_level, astc::flt2int_rtn(a0));
+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
 
 	return try_quantize_rgb_blue_contract(color0, color1, output, quant_level);
 }
@@ -433,7 +459,7 @@ static bool try_quantize_rgb_delta_blue_contract(
 	g1d |= (g0b & 0x100) >> 1;
 	b1d |= (b0b & 0x100) >> 1;
 
-	// Then quantize and  unquantize; if this causes any of the top two bits to flip,
+	// Then quantize and unquantize; if this causes any of the top two bits to flip,
 	// then encoding fails, since we have then corrupted either the top bit of the base
 	// or the sign bit of the offset.
 	int r1de = quant_color(quant_level, r1d);
@@ -728,9 +754,9 @@ static void quantize_rgbs(
 	float g = astc::clamp255f(color.lane<1>() * scale);
 	float b = astc::clamp255f(color.lane<2>() * scale);
 
-	int ri = quant_color(quant_level, astc::flt2int_rtn(r));
-	int gi = quant_color(quant_level, astc::flt2int_rtn(g));
-	int bi = quant_color(quant_level, astc::flt2int_rtn(b));
+	int ri = quant_color(quant_level, astc::flt2int_rtn(r), r);
+	int gi = quant_color(quant_level, astc::flt2int_rtn(g), g);
+	int bi = quant_color(quant_level, astc::flt2int_rtn(b), b);
 
 	float oldcolorsum = hadd_rgb_s(color) * scale;
 	float newcolorsum = static_cast<float>(ri + gi + bi);
@@ -764,8 +790,8 @@ static void quantize_rgbs_alpha(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0));
-	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1));
+	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
+	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
 
 	quantize_rgbs(color, output, quant_level);
 }
@@ -799,8 +825,8 @@ static void quantize_luminance(
 		lum1 = avg;
 	}
 
-	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0));
-	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1));
+	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
+	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
 }
 
 /**
@@ -828,48 +854,10 @@ static void quantize_luminance_alpha(
 	float a0 = astc::clamp255f(color0.lane<3>());
 	float a1 = astc::clamp255f(color1.lane<3>());
 
-	// If endpoints are close then pull apart slightly; this gives > 8 bit normal map precision.
-	if (quant_level > 18)
-	{
-		if (fabsf(lum0 - lum1) < 3.0f)
-		{
-			if (lum0 < lum1)
-			{
-				lum0 -= 0.5f;
-				lum1 += 0.5f;
-			}
-			else
-			{
-				lum0 += 0.5f;
-				lum1 -= 0.5f;
-			}
-
-			lum0 = astc::clamp255f(lum0);
-			lum1 = astc::clamp255f(lum1);
-		}
-
-		if (fabsf(a0 - a1) < 3.0f)
-		{
-			if (a0 < a1)
-			{
-				a0 -= 0.5f;
-				a1 += 0.5f;
-			}
-			else
-			{
-				a0 += 0.5f;
-				a1 -= 0.5f;
-			}
-
-			a0 = astc::clamp255f(a0);
-			a1 = astc::clamp255f(a1);
-		}
-	}
-
-	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0));
-	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1));
-	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0));
-	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1));
+	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
+	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
+	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
+	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
 }
 
 /**
@@ -1661,8 +1649,8 @@ static void quantize_hdr_rgb_ldr_alpha(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0));
-	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1));
+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
 
 	quantize_hdr_rgb(color0, color1, output, quant_level);
 }

+ 7 - 7
thirdparty/astcenc/astcenc_compress_symbolic.cpp

@@ -391,7 +391,7 @@ static float compress_symbolic_block_for_partition_1plane(
 	for (unsigned int i = 0; i < max_decimation_modes; i++)
 	{
 		const auto& dm = bsd.get_decimation_mode(i);
-		if (!dm.is_ref_1_plane(static_cast<quant_method>(max_weight_quant)))
+		if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
 		{
 			continue;
 		}
@@ -561,7 +561,7 @@ static float compress_symbolic_block_for_partition_1plane(
 			workscb.color_formats_matched = 0;
 			if (partition_count >= 2 && all_same)
 			{
-				uint8_t colorvals[BLOCK_MAX_PARTITIONS][12];
+				uint8_t colorvals[BLOCK_MAX_PARTITIONS][8];
 				uint8_t color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 };
 				bool all_same_mod = true;
 				for (unsigned int j = 0; j < partition_count; j++)
@@ -743,7 +743,7 @@ static float compress_symbolic_block_for_partition_2planes(
 	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
 	{
 		const auto& dm = bsd.get_decimation_mode(i);
-		if (!dm.is_ref_2_plane(static_cast<quant_method>(max_weight_quant)))
+		if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
 		{
 			continue;
 		}
@@ -1263,8 +1263,8 @@ void compress_block(
 
 	float exit_thresholds_for_pcount[BLOCK_MAX_PARTITIONS] {
 		0.0f,
-		ctx.config.tune_2_partition_early_out_limit_factor,
-		ctx.config.tune_3_partition_early_out_limit_factor,
+		ctx.config.tune_2partition_early_out_limit_factor,
+		ctx.config.tune_3partition_early_out_limit_factor,
 		0.0f
 	};
 
@@ -1318,7 +1318,7 @@ void compress_block(
 	lowest_correl = prepare_block_statistics(bsd.texel_count, blk);
 #endif
 
-	block_skip_two_plane = lowest_correl > ctx.config.tune_2_plane_early_out_limit_correlation;
+	block_skip_two_plane = lowest_correl > ctx.config.tune_2plane_early_out_limit_correlation;
 
 	// Test the four possible 1-partition, 2-planes modes. Do this in reverse, as
 	// alpha is the most likely to be non-correlated if it is present in the data.
@@ -1331,7 +1331,7 @@ void compress_block(
 
 		if (block_skip_two_plane)
 		{
-			trace_add_data("skip", "tune_2_plane_early_out_limit_correlation");
+			trace_add_data("skip", "tune_2plane_early_out_limit_correlation");
 			continue;
 		}
 

+ 17 - 2
thirdparty/astcenc/astcenc_diagnostic_trace.cpp

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2021-2022 Arm Limited
+// Copyright 2021-2023 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -24,6 +24,8 @@
 #include <cassert>
 #include <cstdarg>
 #include <cstdio>
+#include <cmath>
+#include <limits>
 #include <string>
 
 #include "astcenc_diagnostic_trace.h"
@@ -203,7 +205,20 @@ void trace_add_data(
 	const char* key,
 	float value
 ) {
-  	char buffer[256];
+	// Turn infinities into parseable values
+	if (std::isinf(value))
+	{
+		if (value > 0.0f)
+		{
+			value = std::numeric_limits<float>::max();
+		}
+		else
+		{
+			value = -std::numeric_limits<float>::max();
+		}
+	}
+
+	char buffer[256];
 	sprintf(buffer, "%.20g", (double)value);
 	TraceNode* node = g_TraceLog->get_current_leaf();
 	node->add_attrib("float", key, buffer);

+ 15 - 73
thirdparty/astcenc/astcenc_entry.cpp

@@ -52,9 +52,9 @@ struct astcenc_preset_config
 	float tune_db_limit_a_base;
 	float tune_db_limit_b_base;
 	float tune_mse_overshoot;
-	float tune_2_partition_early_out_limit_factor;
-	float tune_3_partition_early_out_limit_factor;
-	float tune_2_plane_early_out_limit_correlation;
+	float tune_2partition_early_out_limit_factor;
+	float tune_3partition_early_out_limit_factor;
+	float tune_2plane_early_out_limit_correlation;
 };
 
 /**
@@ -157,48 +157,6 @@ static astcenc_error validate_cpu_float()
 	return ASTCENC_SUCCESS;
 }
 
-/**
- * @brief Validate CPU ISA support meets the requirements of this build of the library.
- *
- * Each library build is statically compiled for a particular set of CPU ISA features, such as the
- * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
- * actually supports everything this build needs.
- *
- * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
- */
-static astcenc_error validate_cpu_isa()
-{
-	#if ASTCENC_SSE >= 41
-		if (!cpu_supports_sse41())
-		{
-			return ASTCENC_ERR_BAD_CPU_ISA;
-		}
-	#endif
-
-	#if ASTCENC_POPCNT >= 1
-		if (!cpu_supports_popcnt())
-		{
-			return ASTCENC_ERR_BAD_CPU_ISA;
-		}
-	#endif
-
-	#if ASTCENC_F16C >= 1
-		if (!cpu_supports_f16c())
-		{
-			return ASTCENC_ERR_BAD_CPU_ISA;
-		}
-	#endif
-
-	#if ASTCENC_AVX >= 2
-		if (!cpu_supports_avx2())
-		{
-			return ASTCENC_ERR_BAD_CPU_ISA;
-		}
-	#endif
-
-	return ASTCENC_SUCCESS;
-}
-
 /**
  * @brief Validate config profile.
  *
@@ -439,9 +397,9 @@ static astcenc_error validate_config(
 	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
 	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
-	config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f);
-	config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f);
-	config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f);
+	config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
+	config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
+	config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
 
 	// Specifying a zero weight color component is not allowed; force to small value
 	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
@@ -475,14 +433,6 @@ astcenc_error astcenc_config_init(
 ) {
 	astcenc_error status;
 
-	// Check basic library compatibility options here so they are checked early. Note, these checks
-	// are repeated in context_alloc for cases where callers use a manually defined config struct
-	status = validate_cpu_isa();
-	if (status != ASTCENC_SUCCESS)
-	{
-		return status;
-	}
-
 	status = validate_cpu_float();
 	if (status != ASTCENC_SUCCESS)
 	{
@@ -563,9 +513,9 @@ astcenc_error astcenc_config_init(
 
 		config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
 
-		config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
-		config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
-		config.tune_2_plane_early_out_limit_correlation = (*preset_configs)[start].tune_2_plane_early_out_limit_correlation;
+		config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
+		config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
+		config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
 	}
 	// Start and end node are not the same - so interpolate between them
 	else
@@ -605,9 +555,9 @@ astcenc_error astcenc_config_init(
 
 		config.tune_mse_overshoot = LERP(tune_mse_overshoot);
 
-		config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
-		config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);
-		config.tune_2_plane_early_out_limit_correlation = LERP(tune_2_plane_early_out_limit_correlation);
+		config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
+		config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
+		config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
 		#undef LERP
 		#undef LERPI
 		#undef LERPUI
@@ -656,9 +606,9 @@ astcenc_error astcenc_config_init(
 
 		config.cw_g_weight = 0.0f;
 		config.cw_b_weight = 0.0f;
-		config.tune_2_partition_early_out_limit_factor *= 1.5f;
-		config.tune_3_partition_early_out_limit_factor *= 1.5f;
-		config.tune_2_plane_early_out_limit_correlation = 0.99f;
+		config.tune_2partition_early_out_limit_factor *= 1.5f;
+		config.tune_3partition_early_out_limit_factor *= 1.5f;
+		config.tune_2plane_early_out_limit_correlation = 0.99f;
 
 		// Normals are prone to blocking artifacts on smooth curves
 		// so force compressor to try harder here ...
@@ -702,12 +652,6 @@ astcenc_error astcenc_context_alloc(
 	astcenc_error status;
 	const astcenc_config& config = *configp;
 
-	status = validate_cpu_isa();
-	if (status != ASTCENC_SUCCESS)
-	{
-		return status;
-	}
-
 	status = validate_cpu_float();
 	if (status != ASTCENC_SUCCESS)
 	{
@@ -1399,8 +1343,6 @@ const char* astcenc_get_error_string(
 		return "ASTCENC_ERR_OUT_OF_MEM";
 	case ASTCENC_ERR_BAD_CPU_FLOAT:
 		return "ASTCENC_ERR_BAD_CPU_FLOAT";
-	case ASTCENC_ERR_BAD_CPU_ISA:
-		return "ASTCENC_ERR_BAD_CPU_ISA";
 	case ASTCENC_ERR_BAD_PARAM:
 		return "ASTCENC_ERR_BAD_PARAM";
 	case ASTCENC_ERR_BAD_BLOCK_SIZE:

+ 8 - 20
thirdparty/astcenc/astcenc_find_best_partitioning.cpp

@@ -604,8 +604,7 @@ unsigned int find_best_partition_candidates(
 			processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS];
 			processed_line4 samec_plines[BLOCK_MAX_PARTITIONS];
 
-			float uncor_line_lens[BLOCK_MAX_PARTITIONS];
-			float samec_line_lens[BLOCK_MAX_PARTITIONS];
+			float line_lengths[BLOCK_MAX_PARTITIONS];
 
 			for (unsigned int j = 0; j < partition_count; j++)
 			{
@@ -631,8 +630,7 @@ unsigned int find_best_partition_candidates(
 			                           blk,
 			                           uncor_plines,
 			                           samec_plines,
-			                           uncor_line_lens,
-			                           samec_line_lens,
+			                           line_lengths,
 			                           uncor_error,
 			                           samec_error);
 
@@ -651,8 +649,8 @@ unsigned int find_best_partition_candidates(
 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
 				vfloat4 error_weights(tpp * weight_imprecision_estim);
 
-				vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j];
-				vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j];
+				vfloat4 uncor_vector = uncor_lines[j].b * line_lengths[j];
+				vfloat4 samec_vector = samec_lines[j].b * line_lengths[j];
 
 				uncor_error += dot_s(uncor_vector * uncor_vector, error_weights);
 				samec_error += dot_s(samec_vector * samec_vector, error_weights);
@@ -719,8 +717,8 @@ unsigned int find_best_partition_candidates(
 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
 				vfloat4 error_weights(tpp * weight_imprecision_estim);
 
-				vfloat4 uncor_vector = pl.uncor_line.b * pl.uncor_line_len;
-				vfloat4 samec_vector = pl.samec_line.b * pl.samec_line_len;
+				vfloat4 uncor_vector = pl.uncor_line.b * pl.line_length;
+				vfloat4 samec_vector = pl.samec_line.b * pl.line_length;
 
 				uncor_error += dot3_s(uncor_vector * uncor_vector, error_weights);
 				samec_error += dot3_s(samec_vector * samec_vector, error_weights);
@@ -731,21 +729,11 @@ unsigned int find_best_partition_candidates(
 		}
 	}
 
-	bool best_is_uncor = uncor_best_partitions[0] > samec_best_partitions[0];
-
 	unsigned int interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES];
 	for (unsigned int i = 0; i < requested_candidates; i++)
 	{
-		if (best_is_uncor)
-		{
-			interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
-			interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
-		}
-		else
-		{
-			interleave[2 * i] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
-			interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
-		}
+		interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
+		interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
 	}
 
 	uint64_t bitmasks[1024/64] { 0 };

+ 28 - 51
thirdparty/astcenc/astcenc_internal.h

@@ -293,11 +293,13 @@ struct partition_lines3
 	/** @brief Post-processed line for correlated chroma, passing though the origin. */
 	processed_line3 samec_pline;
 
-	/** @brief The length of the line for uncorrelated chroma. */
-	float uncor_line_len;
-
-	/** @brief The length of the line for correlated chroma. */
-	float samec_line_len;
+	/**
+	 * @brief The length of the line for uncorrelated chroma.
+	 *
+	 * This is used for both the uncorrelated and same chroma lines - they are normally very similar
+	 * and only used for the relative ranking of partitionings against one another.
+	 */
+	float line_length;
 };
 
 /**
@@ -319,8 +321,8 @@ struct partition_info
 	/**
 	 * @brief The number of texels in each partition.
 	 *
-	 * Note that some seeds result in zero texels assigned to a partition are valid, but are skipped
-	 * by this compressor as there is no point spending bits encoding an unused color endpoint.
+	 * Note that some seeds result in zero texels assigned to a partition. These are valid, but are
+	 * skipped by this compressor as there is no point spending bits encoding an unused endpoints.
 	 */
 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
 
@@ -455,23 +457,23 @@ struct decimation_mode
 	 *
 	 * Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
 	 */
-	uint16_t refprec_1_plane;
+	uint16_t refprec_1plane;
 
 	/**
 	 * @brief Bitvector indicating weight quant methods used by active 2 plane block modes.
 	 *
 	 * Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
 	 */
-	uint16_t refprec_2_planes;
+	uint16_t refprec_2planes;
 
 	/**
 	 * @brief Set a 1 plane weight quant as active.
 	 *
 	 * @param weight_quant   The quant method to set.
 	 */
-	void set_ref_1_plane(quant_method weight_quant)
+	void set_ref_1plane(quant_method weight_quant)
 	{
-		refprec_1_plane |= (1 << weight_quant);
+		refprec_1plane |= (1 << weight_quant);
 	}
 
 	/**
@@ -479,10 +481,10 @@ struct decimation_mode
 	 *
 	 * @param max_weight_quant   The max quant method to test.
 	 */
-	bool is_ref_1_plane(quant_method max_weight_quant) const
+	bool is_ref_1plane(quant_method max_weight_quant) const
 	{
 		uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
-		return (refprec_1_plane & mask) != 0;
+		return (refprec_1plane & mask) != 0;
 	}
 
 	/**
@@ -490,9 +492,9 @@ struct decimation_mode
 	 *
 	 * @param weight_quant   The quant method to set.
 	 */
-	void set_ref_2_plane(quant_method weight_quant)
+	void set_ref_2plane(quant_method weight_quant)
 	{
-		refprec_2_planes |= static_cast<uint16_t>(1 << weight_quant);
+		refprec_2planes |= static_cast<uint16_t>(1 << weight_quant);
 	}
 
 	/**
@@ -500,10 +502,10 @@ struct decimation_mode
 	 *
 	 * @param max_weight_quant   The max quant method to test.
 	 */
-	bool is_ref_2_plane(quant_method max_weight_quant) const
+	bool is_ref_2plane(quant_method max_weight_quant) const
 	{
 		uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
-		return (refprec_2_planes & mask) != 0;
+		return (refprec_2planes & mask) != 0;
 	}
 };
 
@@ -1336,9 +1338,14 @@ bool is_legal_3d_block_size(
  * Converts unquant value in 0-255 range into quant value in 0-255 range.
  * No BISE scrambling is applied at this stage.
  *
- * Indexed by [quant_mode - 4][data_value].
+ * The BISE encoding results in ties where available quant<256> values are
+ * equidistant the available quant<BISE> values. This table stores two values
+ * for each input - one for use with a negative residual, and one for use with
+ * a positive residual.
+ *
+ * Indexed by [quant_mode - 4][data_value * 2 + residual].
  */
-extern const uint8_t color_unquant_to_uquant_tables[17][256];
+extern const uint8_t color_unquant_to_uquant_tables[17][512];
 
 /**
  * @brief The precomputed table for packing quantized color values.
@@ -1528,8 +1535,7 @@ void compute_error_squared_rgb(
  * @param      blk             The image block color data to be compressed.
  * @param      uncor_plines    Processed uncorrelated partition lines for each partition.
  * @param      samec_plines    Processed same chroma partition lines for each partition.
- * @param[out] uncor_lengths   The length of each components deviation from the line.
- * @param[out] samec_lengths   The length of each components deviation from the line.
+ * @param[out] line_lengths    The length of each components deviation from the line.
  * @param[out] uncor_error     The cumulative error for using the uncorrelated line.
  * @param[out] samec_error     The cumulative error for using the same chroma line.
  */
@@ -1538,8 +1544,7 @@ void compute_error_squared_rgba(
 	const image_block& blk,
 	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
 	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
-	float uncor_lengths[BLOCK_MAX_PARTITIONS],
-	float samec_lengths[BLOCK_MAX_PARTITIONS],
+	float line_lengths[BLOCK_MAX_PARTITIONS],
 	float& uncor_error,
 	float& samec_error);
 
@@ -2120,34 +2125,6 @@ void physical_to_symbolic(
 /* ============================================================================
 Platform-specific functions.
 ============================================================================ */
-/**
- * @brief Run-time detection if the host CPU supports the POPCNT extension.
- *
- * @return @c true if supported, @c false if not.
- */
-bool cpu_supports_popcnt();
-
-/**
- * @brief Run-time detection if the host CPU supports F16C extension.
- *
- * @return @c true if supported, @c false if not.
- */
-bool cpu_supports_f16c();
-
-/**
- * @brief Run-time detection if the host CPU supports SSE 4.1 extension.
- *
- * @return @c true if supported, @c false if not.
- */
-bool cpu_supports_sse41();
-
-/**
- * @brief Run-time detection if the host CPU supports AVX 2 extension.
- *
- * @return @c true if supported, @c false if not.
- */
-bool cpu_supports_avx2();
-
 /**
  * @brief Allocate an aligned memory buffer.
  *

+ 0 - 166
thirdparty/astcenc/astcenc_platform_isa_detection.cpp

@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// ----------------------------------------------------------------------------
-// Copyright 2020-2022 Arm Limited
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy
-// of the License at:
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations
-// under the License.
-// ----------------------------------------------------------------------------
-
-/**
- * @brief Platform-specific function implementations.
- *
- * This module contains functions for querying the host extended ISA support.
- */
-
-// Include before the defines below to pick up any auto-setup based on compiler
-// built-in config, if not being set explicitly by the build system
-#include "astcenc_internal.h"
-
-#if (ASTCENC_SSE > 0)    || (ASTCENC_AVX > 0) || \
-    (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
-
-static bool g_init { false };
-
-/** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */
-static bool g_cpu_has_sse41 { false };
-
-/** Does this CPU support AVX2? Set to -1 if not yet initialized. */
-static bool g_cpu_has_avx2 { false };
-
-/** Does this CPU support POPCNT? Set to -1 if not yet initialized. */
-static bool g_cpu_has_popcnt { false };
-
-/** Does this CPU support F16C? Set to -1 if not yet initialized. */
-static bool g_cpu_has_f16c { false };
-
-/* ============================================================================
-   Platform code for Visual Studio
-============================================================================ */
-#if !defined(__clang__) && defined(_MSC_VER)
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <intrin.h>
-
-/**
- * @brief Detect platform CPU ISA support and update global trackers.
- */
-static void detect_cpu_isa()
-{
-	int data[4];
-
-	__cpuid(data, 0);
-	int num_id = data[0];
-
-	if (num_id >= 1)
-	{
-		__cpuidex(data, 1, 0);
-		// SSE41 = Bank 1, ECX, bit 19
-		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
-		// POPCNT = Bank 1, ECX, bit 23
-		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
-		// F16C = Bank 1, ECX, bit 29
-		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
-	}
-
-	if (num_id >= 7)
-	{
-		__cpuidex(data, 7, 0);
-		// AVX2 = Bank 7, EBX, bit 5
-		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
-	}
-
-	// Ensure state bits are updated before init flag is updated
-	MemoryBarrier();
-	g_init = true;
-}
-
-/* ============================================================================
-   Platform code for GCC and Clang
-============================================================================ */
-#else
-#include <cpuid.h>
-
-/**
- * @brief Detect platform CPU ISA support and update global trackers.
- */
-static void detect_cpu_isa()
-{
-	unsigned int data[4];
-
-	if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
-	{
-		// SSE41 = Bank 1, ECX, bit 19
-		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
-		// POPCNT = Bank 1, ECX, bit 23
-		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
-		// F16C = Bank 1, ECX, bit 29
-		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
-	}
-
-	g_cpu_has_avx2 = 0;
-	if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3]))
-	{
-		// AVX2 = Bank 7, EBX, bit 5
-		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
-	}
-
-	// Ensure state bits are updated before init flag is updated
-	__sync_synchronize();
-	g_init = true;
-}
-#endif
-
-/* See header for documentation. */
-bool cpu_supports_popcnt()
-{
-	if (!g_init)
-	{
-		detect_cpu_isa();
-	}
-
-	return g_cpu_has_popcnt;
-}
-
-/* See header for documentation. */
-bool cpu_supports_f16c()
-{
-	if (!g_init)
-	{
-		detect_cpu_isa();
-	}
-
-	return g_cpu_has_f16c;
-}
-
-/* See header for documentation. */
-bool cpu_supports_sse41()
-{
-	if (!g_init)
-	{
-		detect_cpu_isa();
-	}
-
-	return g_cpu_has_sse41;
-}
-
-/* See header for documentation. */
-bool cpu_supports_avx2()
-{
-	if (!g_init)
-	{
-		detect_cpu_isa();
-	}
-
-	return g_cpu_has_avx2;
-}
-
-#endif

File diff ditekan karena terlalu besar
+ 444 - 445
thirdparty/astcenc/astcenc_quantization.cpp


+ 42 - 37
thirdparty/astcenc/astcenc_symbolic_physical.cpp

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2023 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -24,36 +24,21 @@
 #include <cassert>
 
 /**
- * @brief Write up to 8 bits at an arbitrary bit offset.
- *
- * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so
- * may span two separate bytes in memory.
+ * @brief Reverse bits in a byte.
  *
- * @param         value       The value to write.
- * @param         bitcount    The number of bits to write, starting from LSB.
- * @param         bitoffset   The bit offset to store at, between 0 and 7.
- * @param[in,out] ptr         The data pointer to write to.
+ * @param p   The value to reverse.
+  *
+ * @return The reversed result.
  */
-static inline void write_bits(
-	int value,
-	int bitcount,
-	int bitoffset,
-	uint8_t* ptr
-) {
-	int mask = (1 << bitcount) - 1;
-	value &= mask;
-	ptr += bitoffset >> 3;
-	bitoffset &= 7;
-	value <<= bitoffset;
-	mask <<= bitoffset;
-	mask = ~mask;
-
-	ptr[0] &= mask;
-	ptr[0] |= value;
-	ptr[1] &= mask >> 8;
-	ptr[1] |= value >> 8;
+static inline int bitrev8(int p)
+{
+	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
+	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
+	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
+	return p;
 }
 
+
 /**
  * @brief Read up to 8 bits at an arbitrary bit offset.
  *
@@ -80,19 +65,37 @@ static inline int read_bits(
 	return value;
 }
 
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+
 /**
- * @brief Reverse bits in a byte.
+ * @brief Write up to 8 bits at an arbitrary bit offset.
  *
- * @param p   The value to reverse.
-  *
- * @return The reversed result.
+ * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so
+ * may span two separate bytes in memory.
+ *
+ * @param         value       The value to write.
+ * @param         bitcount    The number of bits to write, starting from LSB.
+ * @param         bitoffset   The bit offset to store at, between 0 and 7.
+ * @param[in,out] ptr         The data pointer to write to.
  */
-static inline int bitrev8(int p)
-{
-	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
-	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
-	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
-	return p;
+static inline void write_bits(
+	int value,
+	int bitcount,
+	int bitoffset,
+	uint8_t* ptr
+) {
+	int mask = (1 << bitcount) - 1;
+	value &= mask;
+	ptr += bitoffset >> 3;
+	bitoffset &= 7;
+	value <<= bitoffset;
+	mask <<= bitoffset;
+	mask = ~mask;
+
+	ptr[0] &= mask;
+	ptr[0] |= value;
+	ptr[1] &= mask >> 8;
+	ptr[1] |= value >> 8;
 }
 
 /* See header for documentation. */
@@ -282,6 +285,8 @@ void symbolic_to_physical(
 	           scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS);
 }
 
+#endif
+
 /* See header for documentation. */
 void physical_to_symbolic(
 	const block_size_descriptor& bsd,

+ 3 - 3
thirdparty/astcenc/astcenc_weight_align.cpp

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2022 Arm Limited
+// Copyright 2011-2023 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -353,7 +353,7 @@ void compute_angular_endpoints_1plane(
 	for (unsigned int i = 0; i < max_decimation_modes; i++)
 	{
 		const decimation_mode& dm = bsd.decimation_modes[i];
-		if (!dm.is_ref_1_plane(static_cast<quant_method>(max_weight_quant)))
+		if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
 		{
 			continue;
 		}
@@ -422,7 +422,7 @@ void compute_angular_endpoints_2planes(
 	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
 	{
 		const decimation_mode& dm = bsd.decimation_modes[i];
-		if (!dm.is_ref_2_plane(static_cast<quant_method>(max_weight_quant)))
+		if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
 		{
 			continue;
 		}

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini