1 年間前 · 7ceed18790
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -47,7 +47,7 @@ Files extracted from upstream source:
 
				 ## astcenc
			
 
				 
			
 
				 - Upstream: https://github.com/ARM-software/astc-encoder
			
 
				-- Version: 4.7.0 (1a51f2915121275038677317c8bf61f1a78b590c, 2024)
			
 
				+- Version: 4.8.0 (0d6c9047c5ad19640e2d60fdb8f11a16675e7938, 2024)
			
 
				 - License: Apache 2.0
			
 
				 
			
 
				 Files extracted from upstream source:
			
--- a/thirdparty/astcenc/astcenc_entry.cpp
+++ b/thirdparty/astcenc/astcenc_entry.cpp
@@ -1167,7 +1167,7 @@ astcenc_error astcenc_decompress_image(
 
				 		return ASTCENC_ERR_OUT_OF_MEM;
			
 
				 	}
			
 
				 
			
 
				-	image_block blk;
			
 
				+	image_block blk {};
			
 
				 	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
			
 
				 
			
 
				 	// Decode mode inferred from the output data type
			
--- a/thirdparty/astcenc/astcenc_integer_sequence.cpp
+++ b/thirdparty/astcenc/astcenc_integer_sequence.cpp
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2011-2021 Arm Limited
			
 
				+// Copyright 2011-2024 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -464,10 +464,10 @@ static inline void write_bits(
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * @brief Read up to 8 bits at an arbitrary bit offset.
			
 
				+ * @brief Read up to 16 bits from two bytes.
			
 
				  *
			
 
				- * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may
			
 
				- * span two separate bytes in memory.
			
 
				+ * This function reads a packed N-bit field from two bytes in memory. The stored value must exist
			
 
				+ * within the two bytes, but can start at an arbitary bit offset and span the two bytes in memory.
			
 
				  *
			
 
				  * @param         bitcount    The number of bits to read.
			
 
				  * @param         bitoffset   The bit offset to read from, between 0 and 7.
			
--- a/thirdparty/astcenc/astcenc_internal.h
+++ b/thirdparty/astcenc/astcenc_internal.h
@@ -326,10 +326,10 @@ struct partition_info
 
				 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
			
 
				 
			
 
				 	/** @brief The partition of each texel in the block. */
			
 
				-	uint8_t partition_of_texel[BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS uint8_t partition_of_texel[BLOCK_MAX_TEXELS];
			
 
				 
			
 
				 	/** @brief The list of texels in each partition. */
			
 
				-	uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS];
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -367,19 +367,19 @@ struct decimation_info
 
				 	 * @brief The number of weights that contribute to each texel.
			
 
				 	 * Value is between 1 and 4.
			
 
				 	 */
			
 
				-	uint8_t texel_weight_count[BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS uint8_t texel_weight_count[BLOCK_MAX_TEXELS];
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The weight index of the N weights that are interpolated for each texel.
			
 
				 	 * Stored transposed to improve vectorization.
			
 
				 	 */
			
 
				-	uint8_t texel_weights_tr[4][BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS uint8_t texel_weights_tr[4][BLOCK_MAX_TEXELS];
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The bilinear contribution of the N weights that are interpolated for each texel.
			
 
				 	 * Value is between 0 and 16, stored transposed to improve vectorization.
			
 
				 	 */
			
 
				-	uint8_t texel_weight_contribs_int_tr[4][BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS uint8_t texel_weight_contribs_int_tr[4][BLOCK_MAX_TEXELS];
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The bilinear contribution of the N weights that are interpolated for each texel.
			
@@ -388,13 +388,13 @@ struct decimation_info
 
				 	ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
			
 
				 
			
 
				 	/** @brief The number of texels that each stored weight contributes to. */
			
 
				-	uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
			
 
				+	ASTCENC_ALIGNAS uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The list of texels that use a specific weight index.
			
 
				 	 * Stored transposed to improve vectorization.
			
 
				 	 */
			
 
				-	uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
			
 
				+	ASTCENC_ALIGNAS uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief The bilinear contribution to the N texels that use each weight.
			
@@ -732,7 +732,11 @@ struct block_size_descriptor
 
				  *
			
 
				  * The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy
			
 
				  * vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR
			
 
				- * data is stored as direct UNORM data, HDR data is stored as LNS data.
			
 
				+ * data is stored as direct UNORM data, HDR data is stored as LNS data. They are allocated SIMD
			
 
				+ * elements over-size to allow vectorized stores of unaligned and partial SIMD lanes (e.g. in a
			
 
				+ * 6x6x6 block the final row write will read elements 210-217 (vec8) or 214-217 (vec4), which is
			
 
				+ * two elements above the last real data element). The overspill values are never written to memory,
			
 
				+ * and would be benign, but the padding avoids hitting undefined behavior.
			
 
				  *
			
 
				  * The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during
			
 
				  * decompression. The current compressor will always use HDR endpoint formats when in HDR mode.
			
@@ -740,16 +744,16 @@ struct block_size_descriptor
 
				 struct image_block
			
 
				 {
			
 
				 	/** @brief The input (compress) or output (decompress) data for the red color component. */
			
 
				-	ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
			
 
				 
			
 
				 	/** @brief The input (compress) or output (decompress) data for the green color component. */
			
 
				-	ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
			
 
				 
			
 
				 	/** @brief The input (compress) or output (decompress) data for the blue color component. */
			
 
				-	ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
			
 
				 
			
 
				 	/** @brief The input (compress) or output (decompress) data for the alpha color component. */
			
 
				-	ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS];
			
 
				+	ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
			
 
				 
			
 
				 	/** @brief The number of texels in the block. */
			
 
				 	uint8_t texel_count;
			
@@ -957,7 +961,7 @@ struct ASTCENC_ALIGNAS compression_working_buffers
 
				 	 *
			
 
				 	 * For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
			
 
				 	 */
			
 
				-	uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
			
 
				+	ASTCENC_ALIGNAS uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
			
 
				 
			
 
				 	/** @brief Error of the best encoding combination for each block mode. */
			
 
				 	ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
			
@@ -1111,7 +1115,7 @@ struct symbolic_compressed_block
 
				 	 *
			
 
				 	 * If dual plane, the second plane starts at @c weights[WEIGHTS_PLANE2_OFFSET].
			
 
				 	 */
			
 
				-	uint8_t weights[BLOCK_MAX_WEIGHTS];
			
 
				+	ASTCENC_ALIGNAS uint8_t weights[BLOCK_MAX_WEIGHTS];
			
 
				 
			
 
				 	/**
			
 
				 	 * @brief Get the weight quantization used by this block mode.
			
--- a/thirdparty/astcenc/astcenc_internal_entry.h
+++ b/thirdparty/astcenc/astcenc_internal_entry.h
@@ -150,6 +150,7 @@ public:
 
				 		m_start_count = 0;
			
 
				 		m_done_count = 0;
			
 
				 		m_task_count = 0;
			
 
				+		m_callback = nullptr;
			
 
				 		m_callback_last_value = 0.0f;
			
 
				 		m_callback_min_diff = 1.0f;
			
 
				 	}
			
--- a/thirdparty/astcenc/astcenc_symbolic_physical.cpp
+++ b/thirdparty/astcenc/astcenc_symbolic_physical.cpp
@@ -330,12 +330,14 @@ void physical_to_symbolic(
 
				 				return;
			
 
				 			}
			
 
				 
			
 
				+			// Low values span 3 bytes so need two read_bits calls
			
 
				 			int vx_low_s = read_bits(8, 12, pcb) | (read_bits(5, 12 + 8, pcb) << 8);
			
 
				-			int vx_high_s = read_bits(8, 25, pcb) | (read_bits(5, 25 + 8, pcb) << 8);
			
 
				+			int vx_high_s = read_bits(13, 25, pcb);
			
 
				 			int vx_low_t = read_bits(8, 38, pcb) | (read_bits(5, 38 + 8, pcb) << 8);
			
 
				-			int vx_high_t = read_bits(8, 51, pcb) | (read_bits(5, 51 + 8, pcb) << 8);
			
 
				+			int vx_high_t = read_bits(13, 51, pcb);
			
 
				 
			
 
				-			int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
			
 
				+			int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF &&
			
 
				+			               vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
			
 
				 
			
 
				 			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
			
 
				 			{
			
@@ -350,12 +352,14 @@ void physical_to_symbolic(
 
				 			int vx_high_s = read_bits(9, 19, pcb);
			
 
				 			int vx_low_t = read_bits(9, 28, pcb);
			
 
				 			int vx_high_t = read_bits(9, 37, pcb);
			
 
				-			int vx_low_p = read_bits(9, 46, pcb);
			
 
				-			int vx_high_p = read_bits(9, 55, pcb);
			
 
				+			int vx_low_r = read_bits(9, 46, pcb);
			
 
				+			int vx_high_r = read_bits(9, 55, pcb);
			
 
				 
			
 
				-			int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF;
			
 
				+			int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF &&
			
 
				+			               vx_low_t == 0x1FF && vx_high_t == 0x1FF &&
			
 
				+			               vx_low_r == 0x1FF && vx_high_r == 0x1FF;
			
 
				 
			
 
				-			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones)
			
 
				+			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_r >= vx_high_r) && !all_ones)
			
 
				 			{
			
 
				 				scb.block_type = SYM_BTYPE_ERROR;
			
 
				 				return;
			
@@ -470,8 +474,7 @@ void physical_to_symbolic(
 
				 				bitpos += 2;
			
 
				 			}
			
 
				 		}
			
 
				-		scb.partition_index = static_cast<uint16_t>(read_bits(6, 13, pcb) |
			
 
				-		                                            (read_bits(PARTITION_INDEX_BITS - 6, 19, pcb) << 6));
			
 
				+		scb.partition_index = static_cast<uint16_t>(read_bits(10, 13, pcb));
			
 
				 	}
			
 
				 
			
 
				 	for (int i = 0; i < partition_count; i++)
			
--- a/thirdparty/astcenc/astcenc_vecmathlib_none_4.h
+++ b/thirdparty/astcenc/astcenc_vecmathlib_none_4.h
@@ -1,6 +1,6 @@
 
				 // SPDX-License-Identifier: Apache-2.0
			
 
				 // ----------------------------------------------------------------------------
			
 
				-// Copyright 2019-2023 Arm Limited
			
 
				+// Copyright 2019-2024 Arm Limited
			
 
				 //
			
 
				 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
			
 
				 // use this file except in compliance with the License. You may obtain a copy
			
@@ -556,10 +556,16 @@ ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b)
 
				  */
			
 
				 template <int s> ASTCENC_SIMD_INLINE vint4 lsl(vint4 a)
			
 
				 {
			
 
				-	return vint4(a.m[0] << s,
			
 
				-	             a.m[1] << s,
			
 
				-	             a.m[2] << s,
			
 
				-	             a.m[3] << s);
			
 
				+	// Cast to unsigned to avoid shift in/out of sign bit undefined behavior
			
 
				+	unsigned int as0 = static_cast<unsigned int>(a.m[0]) << s;
			
 
				+	unsigned int as1 = static_cast<unsigned int>(a.m[1]) << s;
			
 
				+	unsigned int as2 = static_cast<unsigned int>(a.m[2]) << s;
			
 
				+	unsigned int as3 = static_cast<unsigned int>(a.m[3]) << s;
			
 
				+
			
 
				+	return vint4(static_cast<int>(as0),
			
 
				+	             static_cast<int>(as1),
			
 
				+	             static_cast<int>(as2),
			
 
				+	             static_cast<int>(as3));
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -567,6 +573,7 @@ template <int s> ASTCENC_SIMD_INLINE vint4 lsl(vint4 a)
 
				  */
			
 
				 template <int s> ASTCENC_SIMD_INLINE vint4 lsr(vint4 a)
			
 
				 {
			
 
				+	// Cast to unsigned to avoid shift in/out of sign bit undefined behavior
			
 
				 	unsigned int as0 = static_cast<unsigned int>(a.m[0]) >> s;
			
 
				 	unsigned int as1 = static_cast<unsigned int>(a.m[1]) >> s;
			
 
				 	unsigned int as2 = static_cast<unsigned int>(a.m[2]) >> s;