пре 2 година · a3598c6d28
--- a/3rdparty/spirv-cross/spirv_cross.cpp
+++ b/3rdparty/spirv-cross/spirv_cross.cpp
@@ -5445,18 +5445,55 @@ void Compiler::analyze_interlocked_resource_usage()
 
				 }
			
 
				 
			
 
				 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
			
 
				+{
			
 
				+	if (!type_is_top_level_array(type))
			
 
				+		return false;
			
 
				+
			
 
				+	// BDA types must have parent type hierarchy.
			
 
				+	if (!type.parent_type)
			
 
				+		return false;
			
 
				+
			
 
				+	// Punch through all array layers.
			
 
				+	auto *parent = &get<SPIRType>(type.parent_type);
			
 
				+	while (type_is_top_level_array(*parent))
			
 
				+		parent = &get<SPIRType>(parent->parent_type);
			
 
				+
			
 
				+	return type_is_top_level_pointer(*parent);
			
 
				+}
			
 
				+
			
 
				+bool Compiler::type_is_top_level_pointer(const SPIRType &type) const
			
 
				 {
			
 
				 	if (!type.pointer)
			
 
				 		return false;
			
 
				 
			
 
				-	// If parent type has same pointer depth, we must have an array of pointers.
			
 
				-	return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth;
			
 
				+	// Function pointers, should not be hit by valid SPIR-V.
			
 
				+	// Parent type will be SPIRFunction instead.
			
 
				+	if (type.basetype == SPIRType::Unknown)
			
 
				+		return false;
			
 
				+
			
 
				+	// Some types are synthesized in-place without complete type hierarchy and might not have parent types,
			
 
				+	// but these types are never array-of-pointer or any complicated BDA type, infer reasonable defaults.
			
 
				+	if (type.parent_type)
			
 
				+		return type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth;
			
 
				+	else
			
 
				+		return true;
			
 
				 }
			
 
				 
			
 
				 bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const
			
 
				 {
			
 
				-	return type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
			
 
				-	       type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth;
			
 
				+	return type_is_top_level_pointer(type) && type.storage == StorageClassPhysicalStorageBuffer;
			
 
				+}
			
 
				+
			
 
				+bool Compiler::type_is_top_level_array(const SPIRType &type) const
			
 
				+{
			
 
				+	if (type.array.empty())
			
 
				+		return false;
			
 
				+
			
 
				+	// If we have pointer and array, we infer pointer-to-array as it's the only meaningful thing outside BDA.
			
 
				+	if (type.parent_type)
			
 
				+		return type.array.size() > get<SPIRType>(type.parent_type).array.size();
			
 
				+	else
			
 
				+		return !type.pointer;
			
 
				 }
			
 
				 
			
 
				 bool Compiler::flush_phi_required(BlockID from, BlockID to) const
			
--- a/3rdparty/spirv-cross/spirv_cross.hpp
+++ b/3rdparty/spirv-cross/spirv_cross.hpp
@@ -1145,6 +1145,8 @@ protected:
 
				 
			
 
				 	bool type_is_array_of_pointers(const SPIRType &type) const;
			
 
				 	bool type_is_top_level_physical_pointer(const SPIRType &type) const;
			
 
				+	bool type_is_top_level_pointer(const SPIRType &type) const;
			
 
				+	bool type_is_top_level_array(const SPIRType &type) const;
			
 
				 	bool type_is_block_like(const SPIRType &type) const;
			
 
				 	bool type_is_opaque_value(const SPIRType &type) const;
			
 
				 
			
--- a/3rdparty/spirv-cross/spirv_glsl.cpp
+++ b/3rdparty/spirv-cross/spirv_glsl.cpp
@@ -30,6 +30,7 @@
 
				 #include <limits>
			
 
				 #include <locale.h>
			
 
				 #include <utility>
			
 
				+#include <array>
			
 
				 
			
 
				 #ifndef _WIN32
			
 
				 #include <langinfo.h>
			
@@ -1521,7 +1522,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits
 
				 {
			
 
				 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
			
 
				 	// and is 64-bit.
			
 
				-	if (type.storage == StorageClassPhysicalStorageBufferEXT)
			
 
				+	if (type_is_top_level_physical_pointer(type))
			
 
				 	{
			
 
				 		if (!type.pointer)
			
 
				 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
			
@@ -1536,8 +1537,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits
 
				 		else
			
 
				 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
			
 
				 	}
			
 
				-
			
 
				-	if (!type.array.empty())
			
 
				+	else if (type_is_top_level_array(type))
			
 
				 	{
			
 
				 		uint32_t minimum_alignment = 1;
			
 
				 		if (packing_is_vec4_padded(packing))
			
@@ -1643,21 +1643,9 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B
 
				 
			
 
				 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
			
 
				 {
			
 
				-	if (!type.array.empty())
			
 
				-	{
			
 
				-		uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
			
 
				-
			
 
				-		// For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
			
 
				-		// so that it is possible to pack other vectors into the last element.
			
 
				-		if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
			
 
				-			packed_size -= (4 - type.vecsize) * (type.width / 8);
			
 
				-
			
 
				-		return packed_size;
			
 
				-	}
			
 
				-
			
 
				 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
			
 
				 	// and is 64-bit.
			
 
				-	if (type.storage == StorageClassPhysicalStorageBufferEXT)
			
 
				+	if (type_is_top_level_physical_pointer(type))
			
 
				 	{
			
 
				 		if (!type.pointer)
			
 
				 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
			
@@ -1667,6 +1655,17 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f
 
				 		else
			
 
				 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
			
 
				 	}
			
 
				+	else if (type_is_top_level_array(type))
			
 
				+	{
			
 
				+		uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
			
 
				+
			
 
				+		// For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
			
 
				+		// so that it is possible to pack other vectors into the last element.
			
 
				+		if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
			
 
				+			packed_size -= (4 - type.vecsize) * (type.width / 8);
			
 
				+
			
 
				+		return packed_size;
			
 
				+	}
			
 
				 
			
 
				 	uint32_t size = 0;
			
 
				 
			
@@ -1834,8 +1833,9 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 
				 			}
			
 
				 
			
 
				 			// Verify array stride rules.
			
 
				-			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
			
 
				-			                                    type_struct_member_array_stride(type, i))
			
 
				+			if (type_is_top_level_array(memb_type) &&
			
 
				+			    type_to_packed_array_stride(memb_type, member_flags, packing) !=
			
 
				+			    type_struct_member_array_stride(type, i))
			
 
				 			{
			
 
				 				if (failed_validation_index)
			
 
				 					*failed_validation_index = i;
			
@@ -3993,6 +3993,169 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
			
 
				+{
			
 
				+	std::string result;
			
 
				+	switch (group_op)
			
 
				+	{
			
 
				+	case GroupOperationReduce:
			
 
				+		result = "reduction";
			
 
				+		break;
			
 
				+
			
 
				+	case GroupOperationExclusiveScan:
			
 
				+		result = "excl_scan";
			
 
				+		break;
			
 
				+
			
 
				+	case GroupOperationInclusiveScan:
			
 
				+		result = "incl_scan";
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
			
 
				+	}
			
 
				+
			
 
				+	struct TypeInfo
			
 
				+	{
			
 
				+		std::string type;
			
 
				+		std::string identity;
			
 
				+	};
			
 
				+
			
 
				+	std::vector<TypeInfo> type_infos;
			
 
				+	switch (op)
			
 
				+	{
			
 
				+	case OpGroupNonUniformIAdd:
			
 
				+	{
			
 
				+		type_infos.emplace_back(TypeInfo{ "uint", "0u" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "int", "0" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" });
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	case OpGroupNonUniformFAdd:
			
 
				+	{
			
 
				+		type_infos.emplace_back(TypeInfo{ "float", "0.0f" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" });
			
 
				+		// ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
			
 
				+		type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" });
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	case OpGroupNonUniformIMul:
			
 
				+	{
			
 
				+		type_infos.emplace_back(TypeInfo{ "uint", "1u" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "int", "1" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" });
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	case OpGroupNonUniformFMul:
			
 
				+	{
			
 
				+		type_infos.emplace_back(TypeInfo{ "float", "1.0f" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" });
			
 
				+		type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" });
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	default:
			
 
				+		SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
			
 
				+	}
			
 
				+
			
 
				+	const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
			
 
				+	const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
			
 
				+	std::string op_symbol;
			
 
				+	if (op_is_addition)
			
 
				+	{
			
 
				+		op_symbol = "+=";
			
 
				+	}
			
 
				+	else if (op_is_multiplication)
			
 
				+	{
			
 
				+		op_symbol = "*=";
			
 
				+	}
			
 
				+
			
 
				+	for (const TypeInfo &t : type_infos)
			
 
				+	{
			
 
				+		statement(t.type, " ", func, "(", t.type, " v)");
			
 
				+		begin_scope();
			
 
				+		statement(t.type, " ", result, " = ", t.identity, ";");
			
 
				+		statement("uvec4 active_threads = subgroupBallot(true);");
			
 
				+		statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
			
 
				+		begin_scope();
			
 
				+		statement("uint total = gl_SubgroupSize / 2u;");
			
 
				+		statement(result, " = v;");
			
 
				+		statement("for (uint i = 1u; i <= total; i <<= 1u)");
			
 
				+		begin_scope();
			
 
				+		statement("bool valid;");
			
 
				+		if (group_op == GroupOperationReduce)
			
 
				+		{
			
 
				+			statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);");
			
 
				+		}
			
 
				+		else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
			
 
				+		{
			
 
				+			statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);");
			
 
				+		}
			
 
				+		if (op_is_addition || op_is_multiplication)
			
 
				+		{
			
 
				+			statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
			
 
				+		}
			
 
				+		end_scope();
			
 
				+		if (group_op == GroupOperationExclusiveScan)
			
 
				+		{
			
 
				+			statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);");
			
 
				+			statement("if (subgroupElect())");
			
 
				+			begin_scope();
			
 
				+			statement(result, " = ", t.identity, ";");
			
 
				+			end_scope();
			
 
				+		}
			
 
				+		end_scope();
			
 
				+		statement("else");
			
 
				+		begin_scope();
			
 
				+		if (group_op == GroupOperationExclusiveScan)
			
 
				+		{
			
 
				+			statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
			
 
				+		}
			
 
				+		else if (group_op == GroupOperationInclusiveScan)
			
 
				+		{
			
 
				+			statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
			
 
				+		}
			
 
				+		statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)");
			
 
				+		begin_scope();
			
 
				+		statement("bool valid = subgroupBallotBitExtract(active_threads, i);");
			
 
				+		statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);");
			
 
				+		if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
			
 
				+		{
			
 
				+			statement("valid = valid && (i < total);");
			
 
				+		}
			
 
				+		if (op_is_addition || op_is_multiplication)
			
 
				+		{
			
 
				+			statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
			
 
				+		}
			
 
				+		end_scope();
			
 
				+		end_scope();
			
 
				+		statement("return ", result, ";");
			
 
				+		end_scope();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
			
 
				 {
			
 
				 	static const char *workaround_types[] = { "int",   "ivec2", "ivec3", "ivec4", "uint",   "uvec2", "uvec3", "uvec4",
			
@@ -4396,6 +4559,57 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
 
				 			statement("#endif");
			
 
				 			statement("");
			
 
				 		}
			
 
				+
			
 
				+		auto arithmetic_feature_helper =
			
 
				+		    [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op)
			
 
				+		{
			
 
				+			if (shader_subgroup_supporter.is_feature_requested(feat))
			
 
				+			{
			
 
				+				auto exts = Supp::get_candidates_for_feature(feat, result);
			
 
				+				for (auto &e : exts)
			
 
				+				{
			
 
				+					const char *name = Supp::get_extension_name(e);
			
 
				+					statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
			
 
				+
			
 
				+					switch (e)
			
 
				+					{
			
 
				+					case Supp::NV_shader_thread_shuffle:
			
 
				+						emit_subgroup_arithmetic_workaround(func_name, op, group_op);
			
 
				+						break;
			
 
				+					default:
			
 
				+						break;
			
 
				+					}
			
 
				+				}
			
 
				+				statement("#endif");
			
 
				+				statement("");
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
			
 
				+		                          GroupOperationReduce);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
			
 
				+		                          OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
			
 
				+		                          OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
			
 
				+		                          GroupOperationReduce);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
			
 
				+		                          OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
			
 
				+		                          OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
			
 
				+
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
			
 
				+		                          GroupOperationReduce);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
			
 
				+		                          OpGroupNonUniformIMul, GroupOperationExclusiveScan);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
			
 
				+		                          OpGroupNonUniformIMul, GroupOperationInclusiveScan);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
			
 
				+		                          GroupOperationReduce);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
			
 
				+		                          OpGroupNonUniformFMul, GroupOperationExclusiveScan);
			
 
				+		arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
			
 
				+		                          OpGroupNonUniformFMul, GroupOperationInclusiveScan);
			
 
				 	}
			
 
				 
			
 
				 	if (!workaround_ubo_load_overload_types.empty())
			
@@ -7109,7 +7323,7 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s
 
				 	}
			
 
				 }
			
 
				 
			
 
				-bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
			
 
				+bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops)
			
 
				 {
			
 
				 	switch (op)
			
 
				 	{
			
@@ -7128,6 +7342,22 @@ bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
 
				 	case OpGroupNonUniformBallotBitExtract:
			
 
				 	case OpGroupNonUniformInverseBallot:
			
 
				 		return true;
			
 
				+	case OpGroupNonUniformIAdd:
			
 
				+	case OpGroupNonUniformFAdd:
			
 
				+	case OpGroupNonUniformIMul:
			
 
				+	case OpGroupNonUniformFMul:
			
 
				+	{
			
 
				+		const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
			
 
				+		if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
			
 
				+		    operation == GroupOperationExclusiveScan)
			
 
				+		{
			
 
				+			return true;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				 	default:
			
 
				 		return false;
			
 
				 	}
			
@@ -7522,7 +7752,7 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
 
				 bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
			
 
				 {
			
 
				 	auto &type = expression_type(ptr);
			
 
				-	if (type.array.empty())
			
 
				+	if (!type_is_top_level_array(get_pointee_type(type)))
			
 
				 		return false;
			
 
				 
			
 
				 	if (!backend.array_is_value_type)
			
@@ -8725,7 +8955,7 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
 
				 	const uint32_t *ops = stream(i);
			
 
				 	auto op = static_cast<Op>(i.op);
			
 
				 
			
 
				-	if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
			
 
				+	if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
			
 
				 		SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
			
 
				 
			
 
				 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
			
@@ -8793,12 +9023,34 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
 
				 	}
			
 
				 	break;
			
 
				 
			
 
				-	case OpGroupNonUniformFAdd:
			
 
				-	case OpGroupNonUniformFMul:
			
 
				+	// clang-format off
			
 
				+#define GLSL_GROUP_OP(OP)\
			
 
				+	case OpGroupNonUniform##OP:\
			
 
				+	{\
			
 
				+		auto operation = static_cast<GroupOperation>(ops[3]);\
			
 
				+		if (operation == GroupOperationClusteredReduce)\
			
 
				+			require_extension_internal("GL_KHR_shader_subgroup_clustered");\
			
 
				+		else if (operation == GroupOperationReduce)\
			
 
				+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
			
 
				+		else if (operation == GroupOperationExclusiveScan)\
			
 
				+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
			
 
				+		else if (operation == GroupOperationInclusiveScan)\
			
 
				+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
			
 
				+		else\
			
 
				+			SPIRV_CROSS_THROW("Invalid group operation.");\
			
 
				+		break;\
			
 
				+	}
			
 
				+
			
 
				+	GLSL_GROUP_OP(IAdd)
			
 
				+	GLSL_GROUP_OP(FAdd)
			
 
				+	GLSL_GROUP_OP(IMul)
			
 
				+	GLSL_GROUP_OP(FMul)
			
 
				+
			
 
				+#undef GLSL_GROUP_OP
			
 
				+	// clang-format on
			
 
				+
			
 
				 	case OpGroupNonUniformFMin:
			
 
				 	case OpGroupNonUniformFMax:
			
 
				-	case OpGroupNonUniformIAdd:
			
 
				-	case OpGroupNonUniformIMul:
			
 
				 	case OpGroupNonUniformSMin:
			
 
				 	case OpGroupNonUniformSMax:
			
 
				 	case OpGroupNonUniformUMin:
			
@@ -17667,6 +17919,7 @@ const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candid
 
				 	static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
			
 
				 		                                                "GL_KHR_shader_subgroup_basic",
			
 
				 		                                                "GL_KHR_shader_subgroup_vote",
			
 
				+		                                                "GL_KHR_shader_subgroup_arithmetic",
			
 
				 		                                                "GL_NV_gpu_shader_5",
			
 
				 		                                                "GL_NV_shader_thread_group",
			
 
				 		                                                "GL_NV_shader_thread_shuffle",
			
@@ -17715,6 +17968,21 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSub
 
				 		return { SubgroupMask };
			
 
				 	case SubgroupBallotBitCount:
			
 
				 		return { SubgroupBallot };
			
 
				+	case SubgroupArithmeticIAddReduce:
			
 
				+	case SubgroupArithmeticIAddInclusiveScan:
			
 
				+	case SubgroupArithmeticFAddReduce:
			
 
				+	case SubgroupArithmeticFAddInclusiveScan:
			
 
				+	case SubgroupArithmeticIMulReduce:
			
 
				+	case SubgroupArithmeticIMulInclusiveScan:
			
 
				+	case SubgroupArithmeticFMulReduce:
			
 
				+	case SubgroupArithmeticFMulInclusiveScan:
			
 
				+		return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
			
 
				+	case SubgroupArithmeticIAddExclusiveScan:
			
 
				+	case SubgroupArithmeticFAddExclusiveScan:
			
 
				+	case SubgroupArithmeticIMulExclusiveScan:
			
 
				+	case SubgroupArithmeticFMulExclusiveScan:
			
 
				+		return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
			
 
				+			     SubgroupMask, SubgroupElect,  SubgroupBallotBitExtract };
			
 
				 	default:
			
 
				 		return {};
			
 
				 	}
			
@@ -17728,11 +17996,15 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgr
 
				 
			
 
				 bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
			
 
				 {
			
 
				-	static const bool retval[FeatureCount] = { false, false, false, false, false, false,
			
 
				-		                                       true, // SubgroupBalloFindLSB_MSB
			
 
				-		                                       false, false, false, false,
			
 
				-		                                       true, // SubgroupMemBarrier - replaced with workgroup memory barriers
			
 
				-		                                       false, false, true,  false };
			
 
				+	static const bool retval[FeatureCount] = {
			
 
				+		false, false, false, false, false, false,
			
 
				+		true, // SubgroupBalloFindLSB_MSB
			
 
				+		false, false, false, false,
			
 
				+		true, // SubgroupMemBarrier - replaced with workgroup memory barriers
			
 
				+		false, false, true, false,
			
 
				+		false, false, false, false, false, false, // iadd, fadd
			
 
				+		false, false, false, false, false, false, // imul , fmul
			
 
				+	};
			
 
				 
			
 
				 	return retval[feature];
			
 
				 }
			
@@ -17744,7 +18016,11 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgrou
 
				 		KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,
			
 
				 		KHR_shader_subgroup_basic,  KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
			
 
				 		KHR_shader_subgroup_vote,   KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
			
 
				-		KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
			
 
				+		KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
			
 
				+		KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
			
 
				+		KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
			
 
				+		KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
			
 
				+		KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
			
 
				 	};
			
 
				 
			
 
				 	return extensions[feature];
			
@@ -17840,6 +18116,19 @@ CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderS
 
				 		return { NV_shader_thread_group };
			
 
				 	case SubgroupBallotBitCount:
			
 
				 		return {};
			
 
				+	case SubgroupArithmeticIAddReduce:
			
 
				+	case SubgroupArithmeticIAddExclusiveScan:
			
 
				+	case SubgroupArithmeticIAddInclusiveScan:
			
 
				+	case SubgroupArithmeticFAddReduce:
			
 
				+	case SubgroupArithmeticFAddExclusiveScan:
			
 
				+	case SubgroupArithmeticFAddInclusiveScan:
			
 
				+	case SubgroupArithmeticIMulReduce:
			
 
				+	case SubgroupArithmeticIMulExclusiveScan:
			
 
				+	case SubgroupArithmeticIMulInclusiveScan:
			
 
				+	case SubgroupArithmeticFMulReduce:
			
 
				+	case SubgroupArithmeticFMulExclusiveScan:
			
 
				+	case SubgroupArithmeticFMulInclusiveScan:
			
 
				+		return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
			
 
				 	default:
			
 
				 		return {};
			
 
				 	}
			
@@ -17864,6 +18153,7 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
 
				 	weights[KHR_shader_subgroup_ballot] = big_num;
			
 
				 	weights[KHR_shader_subgroup_basic] = big_num;
			
 
				 	weights[KHR_shader_subgroup_vote] = big_num;
			
 
				+	weights[KHR_shader_subgroup_arithmetic] = big_num;
			
 
				 }
			
 
				 
			
 
				 void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
			
--- a/3rdparty/spirv-cross/spirv_glsl.hpp
+++ b/3rdparty/spirv-cross/spirv_glsl.hpp
@@ -292,6 +292,7 @@ protected:
 
				 			KHR_shader_subgroup_ballot,
			
 
				 			KHR_shader_subgroup_basic,
			
 
				 			KHR_shader_subgroup_vote,
			
 
				+			KHR_shader_subgroup_arithmetic,
			
 
				 			NV_gpu_shader_5,
			
 
				 			NV_shader_thread_group,
			
 
				 			NV_shader_thread_shuffle,
			
@@ -324,7 +325,18 @@ protected:
 
				 			SubgroupInverseBallot_InclBitCount_ExclBitCout = 13,
			
 
				 			SubgroupBallotBitExtract = 14,
			
 
				 			SubgroupBallotBitCount = 15,
			
 
				-
			
 
				+			SubgroupArithmeticIAddReduce = 16,
			
 
				+			SubgroupArithmeticIAddExclusiveScan = 17,
			
 
				+			SubgroupArithmeticIAddInclusiveScan = 18,
			
 
				+			SubgroupArithmeticFAddReduce = 19,
			
 
				+			SubgroupArithmeticFAddExclusiveScan = 20,
			
 
				+			SubgroupArithmeticFAddInclusiveScan = 21,
			
 
				+			SubgroupArithmeticIMulReduce = 22,
			
 
				+			SubgroupArithmeticIMulExclusiveScan = 23,
			
 
				+			SubgroupArithmeticIMulInclusiveScan = 24,
			
 
				+			SubgroupArithmeticFMulReduce = 25,
			
 
				+			SubgroupArithmeticFMulExclusiveScan = 26,
			
 
				+			SubgroupArithmeticFMulInclusiveScan = 27,
			
 
				 			FeatureCount
			
 
				 		};
			
 
				 
			
@@ -358,7 +370,7 @@ protected:
 
				 	};
			
 
				 
			
 
				 	// TODO remove this function when all subgroup ops are supported (or make it always return true)
			
 
				-	static bool is_supported_subgroup_op_in_opengl(spv::Op op);
			
 
				+	static bool is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops);
			
 
				 
			
 
				 	void reset(uint32_t iteration_count);
			
 
				 	void emit_function(SPIRFunction &func, const Bitset &return_flags);
			
@@ -627,6 +639,7 @@ protected:
 
				 	void emit_struct(SPIRType &type);
			
 
				 	void emit_resources();
			
 
				 	void emit_extension_workarounds(spv::ExecutionModel model);
			
 
				+	void emit_subgroup_arithmetic_workaround(const std::string &func, spv::Op op, spv::GroupOperation group_op);
			
 
				 	void emit_polyfills(uint32_t polyfills, bool relaxed);
			
 
				 	void emit_buffer_block_native(const SPIRVariable &var);
			
 
				 	void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration);
			
--- a/3rdparty/spirv-cross/spirv_msl.cpp
+++ b/3rdparty/spirv-cross/spirv_msl.cpp
@@ -9603,7 +9603,7 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
 
				 {
			
 
				 	// We only care about assignments of an entire array
			
 
				 	auto &type = expression_type(id_rhs);
			
 
				-	if (type.array.size() == 0)
			
 
				+	if (!type_is_top_level_array(get_pointee_type(type)))
			
 
				 		return false;
			
 
				 
			
 
				 	auto *var = maybe_get<SPIRVariable>(id_lhs);
			
@@ -14550,7 +14550,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member)
 
				 	string type_name;
			
 
				 
			
 
				 	// Pointer?
			
 
				-	if (type.pointer)
			
 
				+	if (type_is_top_level_pointer(type) || type_is_array_of_pointers(type))
			
 
				 	{
			
 
				 		assert(type.pointer_depth > 0);
			
 
				 
			
@@ -14573,7 +14573,17 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member)
 
				 			while (type_is_pointer(*p_parent_type))
			
 
				 				p_parent_type = &get<SPIRType>(p_parent_type->parent_type);
			
 
				 
			
 
				+			// If we're emitting BDA, just use the templated type.
			
 
				+			// Emitting builtin arrays need a lot of cooperation with other code to ensure
			
 
				+			// the C-style nesting works right.
			
 
				+			// FIXME: This is somewhat of a hack.
			
 
				+			bool old_is_using_builtin_array = is_using_builtin_array;
			
 
				+			if (type_is_top_level_physical_pointer(type))
			
 
				+				is_using_builtin_array = false;
			
 
				+
			
 
				 			type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id));
			
 
				+
			
 
				+			is_using_builtin_array = old_is_using_builtin_array;
			
 
				 		}
			
 
				 
			
 
				 		switch (type.basetype)
			
--- a/3rdparty/spirv-cross/spirv_reflect.cpp
+++ b/3rdparty/spirv-cross/spirv_reflect.cpp
@@ -292,7 +292,7 @@ bool CompilerReflection::type_is_reference(const SPIRType &type) const
 
				 {
			
 
				 	// Physical pointers and arrays of physical pointers need to refer to the pointee's type.
			
 
				 	return type_is_top_level_physical_pointer(type) ||
			
 
				-	       (!type.array.empty() && type_is_top_level_physical_pointer(get<SPIRType>(type.parent_type)));
			
 
				+	       (type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBuffer);
			
 
				 }
			
 
				 
			
 
				 void CompilerReflection::emit_types()