5 سال پیش · b7fb619125
--- a/3rdparty/spirv-cross/main.cpp
+++ b/3rdparty/spirv-cross/main.cpp
@@ -565,6 +565,7 @@ struct CLIArguments
 
				 	bool msl_arrayed_subpass_input = false;
			
 
				 	uint32_t msl_r32ui_linear_texture_alignment = 4;
			
 
				 	uint32_t msl_r32ui_alignment_constant_id = 65535;
			
 
				+	bool msl_texture_1d_as_2d = false;
			
 
				 	bool glsl_emit_push_constant_as_ubo = false;
			
 
				 	bool glsl_emit_ubo_as_plain_uniforms = false;
			
 
				 	bool glsl_force_flattened_io_blocks = false;
			
@@ -728,7 +729,7 @@ static void print_help_msl()
 
				 	                "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
			
 
				 	                "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
			
 
				 	                "\t\tEmits [[color(N)]] inputs in fragment stage.\n"
			
 
				-	                "\t\tRequires iOS Metal.\n"
			
 
				+	                "\t\tRequires an Apple GPU.\n"
			
 
				 	                "\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n"
			
 
				 	                "\t[--msl-discrete-descriptor-set <index>]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n"
			
 
				 	                "\t\tUseful for implementing push descriptors in emulation layers.\n"
			
@@ -774,7 +775,9 @@ static void print_help_msl()
 
				 	                "\t[--msl-r32ui-linear-texture-align <alignment>]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n"
			
 
				 	                "\t\tThis is used to align the row stride for atomic accesses to such images.\n"
			
 
				 	                "\t[--msl-r32ui-linear-texture-align-constant-id <id>]:\n\t\tThe function constant ID to use for the linear texture alignment.\n"
			
 
				-	                "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n");
			
 
				+	                "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n"
			
 
				+	                "\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n"
			
 
				+	                "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n");
			
 
				 	// clang-format on
			
 
				 }
			
 
				 
			
@@ -991,9 +994,9 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 
				 		if (args.msl_ios)
			
 
				 		{
			
 
				 			msl_opts.platform = CompilerMSL::Options::iOS;
			
 
				-			msl_opts.ios_use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
			
 
				 			msl_opts.emulate_cube_array = args.msl_emulate_cube_array;
			
 
				 		}
			
 
				+		msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
			
 
				 		msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output;
			
 
				 		msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
			
 
				 		msl_opts.argument_buffers = args.msl_argument_buffers;
			
@@ -1015,6 +1018,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 
				 		msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input;
			
 
				 		msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment;
			
 
				 		msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id;
			
 
				+		msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d;
			
 
				 		msl_comp->set_msl_options(msl_opts);
			
 
				 		for (auto &v : args.msl_discrete_descriptor_sets)
			
 
				 			msl_comp->add_discrete_descriptor_set(v);
			
@@ -1439,6 +1443,7 @@ static int main_inner(int argc, char *argv[])
 
				 	        [&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); });
			
 
				 	cbs.add("--msl-r32ui-linear-texture-align-constant-id",
			
 
				 	        [&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); });
			
 
				+	cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; });
			
 
				 	cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
			
 
				 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
			
 
				 		auto old_name = parser.next_string();
			
--- a/3rdparty/spirv-cross/spirv_common.hpp
+++ b/3rdparty/spirv-cross/spirv_common.hpp
@@ -357,28 +357,6 @@ public:
 
				 		return TypedID<U>(*this);
			
 
				 	}
			
 
				 
			
 
				-	bool operator==(const TypedID &other) const
			
 
				-	{
			
 
				-		return id == other.id;
			
 
				-	}
			
 
				-
			
 
				-	bool operator!=(const TypedID &other) const
			
 
				-	{
			
 
				-		return id != other.id;
			
 
				-	}
			
 
				-
			
 
				-	template <Types type>
			
 
				-	bool operator==(const TypedID<type> &other) const
			
 
				-	{
			
 
				-		return id == uint32_t(other);
			
 
				-	}
			
 
				-
			
 
				-	template <Types type>
			
 
				-	bool operator!=(const TypedID<type> &other) const
			
 
				-	{
			
 
				-		return id != uint32_t(other);
			
 
				-	}
			
 
				-
			
 
				 private:
			
 
				 	uint32_t id = 0;
			
 
				 };
			
@@ -403,26 +381,6 @@ public:
 
				 		return id;
			
 
				 	}
			
 
				 
			
 
				-	bool operator==(const TypedID &other) const
			
 
				-	{
			
 
				-		return id == other.id;
			
 
				-	}
			
 
				-
			
 
				-	bool operator!=(const TypedID &other) const
			
 
				-	{
			
 
				-		return id != other.id;
			
 
				-	}
			
 
				-
			
 
				-	bool operator==(const TypedID<TypeNone> &other) const
			
 
				-	{
			
 
				-		return id == uint32_t(other);
			
 
				-	}
			
 
				-
			
 
				-	bool operator!=(const TypedID<TypeNone> &other) const
			
 
				-	{
			
 
				-		return id != uint32_t(other);
			
 
				-	}
			
 
				-
			
 
				 private:
			
 
				 	uint32_t id = 0;
			
 
				 };
			
--- a/3rdparty/spirv-cross/spirv_cross_c.cpp
+++ b/3rdparty/spirv-cross/spirv_cross_c.cpp
@@ -599,8 +599,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 
				 		options->msl.enable_base_index_zero = value != 0;
			
 
				 		break;
			
 
				 
			
 
				-	case SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS:
			
 
				-		options->msl.ios_use_framebuffer_fetch_subpasses = value != 0;
			
 
				+	case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS:
			
 
				+		options->msl.use_framebuffer_fetch_subpasses = value != 0;
			
 
				 		break;
			
 
				 
			
 
				 	case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH:
			
--- a/3rdparty/spirv-cross/spirv_cross_c.h
+++ b/3rdparty/spirv-cross/spirv_cross_c.h
@@ -606,7 +606,11 @@ typedef enum spvc_compiler_option
 
				 	SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				 	SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				 	SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				+
			
 
				+	/* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */
			
 
				 	SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				+	SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				+
			
 
				 	SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				 	SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT,
			
 
				 	SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT,
			
--- a/3rdparty/spirv-cross/spirv_glsl.cpp
+++ b/3rdparty/spirv-cross/spirv_glsl.cpp
@@ -511,6 +511,7 @@ string CompilerGLSL::compile()
 
				 	{
			
 
				 		// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
			
 
				 		backend.nonuniform_qualifier = "";
			
 
				+		backend.needs_row_major_load_workaround = true;
			
 
				 	}
			
 
				 	backend.force_gl_in_out_block = true;
			
 
				 	backend.supports_extensions = true;
			
@@ -3798,6 +3799,17 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
 
				 			statement("");
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	if (!workaround_ubo_load_overload_types.empty())
			
 
				+	{
			
 
				+		for (auto &type_id : workaround_ubo_load_overload_types)
			
 
				+		{
			
 
				+			auto &type = get<SPIRType>(type_id);
			
 
				+			statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type),
			
 
				+			          " wrap) { return wrap; }");
			
 
				+		}
			
 
				+		statement("");
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 // Returns a string representation of the ID, usable as a function arg.
			
@@ -9496,11 +9508,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		if (forward && ptr_expression)
			
 
				 			ptr_expression->need_transpose = old_need_transpose;
			
 
				 
			
 
				+		bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
			
 
				+
			
 
				+		if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
			
 
				+			rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
			
 
				+
			
 
				 		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
			
 
				 		// However, if we try to load a complex, composite object from a flattened buffer,
			
 
				 		// we should avoid emitting the same code over and over and lower the result to a temporary.
			
 
				-		bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
			
 
				-		                      (type.basetype == SPIRType::Struct || (type.columns > 1));
			
 
				+		bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
			
 
				 
			
 
				 		SPIRExpression *e = nullptr;
			
 
				 		if (!forward && expression_is_non_value_type_array(ptr))
			
@@ -13253,8 +13269,14 @@ void CompilerGLSL::branch(BlockID from, BlockID to)
 
				 		// and end the chain here.
			
 
				 		statement("continue;");
			
 
				 	}
			
 
				-	else if (is_break(to))
			
 
				+	else if (from != to && is_break(to))
			
 
				 	{
			
 
				+		// We cannot break to ourselves, so check explicitly for from != to.
			
 
				+		// This case can trigger if a loop header is all three of these things:
			
 
				+		// - Continue block
			
 
				+		// - Loop header
			
 
				+		// - Break merge target all at once ...
			
 
				+
			
 
				 		// Very dirty workaround.
			
 
				 		// Switch constructs are able to break, but they cannot break out of a loop at the same time.
			
 
				 		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
			
@@ -14578,7 +14600,35 @@ void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::str
 
				 		// so we might have to fixup the OpLoad-ed expression late.
			
 
				 
			
 
				 		auto start_array_index = expr.find_first_of('[');
			
 
				-		auto end_array_index = expr.find_last_of(']');
			
 
				+
			
 
				+		if (start_array_index == string::npos)
			
 
				+			return;
			
 
				+
			
 
				+		// Check for the edge case that a non-arrayed resource was marked to be nonuniform,
			
 
				+		// and the bracket we found is actually part of non-resource related data.
			
 
				+		if (expr.find_first_of(',') < start_array_index)
			
 
				+			return;
			
 
				+
			
 
				+		// We've opened a bracket, track expressions until we can close the bracket.
			
 
				+		// This must be our image index.
			
 
				+		size_t end_array_index = string::npos;
			
 
				+		unsigned bracket_count = 1;
			
 
				+		for (size_t index = start_array_index + 1; index < expr.size(); index++)
			
 
				+		{
			
 
				+			if (expr[index] == ']')
			
 
				+			{
			
 
				+				if (--bracket_count == 0)
			
 
				+				{
			
 
				+					end_array_index = index;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+			else if (expr[index] == '[')
			
 
				+				bracket_count++;
			
 
				+		}
			
 
				+
			
 
				+		assert(bracket_count == 0);
			
 
				+
			
 
				 		// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
			
 
				 		// nothing we can do here to express that.
			
 
				 		if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
			
@@ -15087,3 +15137,63 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
 
				 	weights[KHR_shader_subgroup_basic] = big_num;
			
 
				 	weights[KHR_shader_subgroup_vote] = big_num;
			
 
				 }
			
 
				+
			
 
				+void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
			
 
				+{
			
 
				+	// Must be ordered to maintain deterministic output, so vector is appropriate.
			
 
				+	if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
			
 
				+	    end(workaround_ubo_load_overload_types))
			
 
				+	{
			
 
				+		force_recompile();
			
 
				+		workaround_ubo_load_overload_types.push_back(id);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
			
 
				+{
			
 
				+	// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
			
 
				+	// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
			
 
				+	// ensure row_major decoration is actually respected.
			
 
				+	auto *var = maybe_get_backing_variable(ptr);
			
 
				+	if (!var)
			
 
				+		return;
			
 
				+
			
 
				+	auto &backing_type = get<SPIRType>(var->basetype);
			
 
				+	bool is_ubo = backing_type.basetype == SPIRType::Struct &&
			
 
				+	              backing_type.storage == StorageClassUniform &&
			
 
				+	              has_decoration(backing_type.self, DecorationBlock);
			
 
				+	if (!is_ubo)
			
 
				+		return;
			
 
				+
			
 
				+	auto *type = &get<SPIRType>(loaded_type);
			
 
				+	bool rewrite = false;
			
 
				+
			
 
				+	if (is_matrix(*type))
			
 
				+	{
			
 
				+		// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
			
 
				+		// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
			
 
				+		// If there is any row-major action going on, we apply the workaround.
			
 
				+		// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
			
 
				+		// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
			
 
				+		type = &backing_type;
			
 
				+	}
			
 
				+
			
 
				+	if (type->basetype == SPIRType::Struct)
			
 
				+	{
			
 
				+		// If we're loading a struct where any member is a row-major matrix, apply the workaround.
			
 
				+		for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
			
 
				+		{
			
 
				+			if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
			
 
				+			{
			
 
				+				rewrite = true;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (rewrite)
			
 
				+	{
			
 
				+		request_workaround_wrapper_overload(loaded_type);
			
 
				+		expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")");
			
 
				+	}
			
 
				+}
			
--- a/3rdparty/spirv-cross/spirv_glsl.hpp
+++ b/3rdparty/spirv-cross/spirv_glsl.hpp
@@ -560,6 +560,7 @@ protected:
 
				 		bool support_small_type_sampling_result = false;
			
 
				 		bool support_case_fallthrough = true;
			
 
				 		bool use_array_constructor = false;
			
 
				+		bool needs_row_major_load_workaround = false;
			
 
				 	} backend;
			
 
				 
			
 
				 	void emit_struct(SPIRType &type);
			
@@ -784,6 +785,10 @@ protected:
 
				 	// Currently used by NMin/Max/Clamp implementations.
			
 
				 	std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
			
 
				 
			
 
				+	SmallVector<TypeID> workaround_ubo_load_overload_types;
			
 
				+	void request_workaround_wrapper_overload(TypeID id);
			
 
				+	void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr);
			
 
				+
			
 
				 	uint32_t statement_count = 0;
			
 
				 
			
 
				 	inline bool is_legacy() const
			
--- a/3rdparty/spirv-cross/spirv_msl.cpp
+++ b/3rdparty/spirv-cross/spirv_msl.cpp
@@ -160,7 +160,7 @@ void CompilerMSL::build_implicit_builtins()
 
				 	bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff;
			
 
				 	if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
			
 
				 	    need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
			
 
				-	    needs_subgroup_invocation_id || need_sample_mask)
			
 
				+	    needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask)
			
 
				 	{
			
 
				 		bool has_frag_coord = false;
			
 
				 		bool has_sample_id = false;
			
@@ -197,7 +197,7 @@ void CompilerMSL::build_implicit_builtins()
 
				 			if (var.storage != StorageClassInput)
			
 
				 				return;
			
 
				 
			
 
				-			if (need_subpass_input && (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses))
			
 
				+			if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
			
 
				 			{
			
 
				 				switch (builtin)
			
 
				 				{
			
@@ -287,7 +287,7 @@ void CompilerMSL::build_implicit_builtins()
 
				 				has_subgroup_invocation_id = true;
			
 
				 			}
			
 
				 
			
 
				-			if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize)
			
 
				+			if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize)
			
 
				 			{
			
 
				 				builtin_subgroup_size_id = var.self;
			
 
				 				mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self);
			
@@ -331,7 +331,7 @@ void CompilerMSL::build_implicit_builtins()
 
				 		// Use Metal's native frame-buffer fetch API for subpass inputs.
			
 
				 		if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) ||
			
 
				 		     (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) &&
			
 
				-		    (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses) && need_subpass_input)
			
 
				+		    (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input)
			
 
				 		{
			
 
				 			if (!has_frag_coord)
			
 
				 			{
			
@@ -593,7 +593,7 @@ void CompilerMSL::build_implicit_builtins()
 
				 			mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
			
 
				 		}
			
 
				 
			
 
				-		if (!has_subgroup_size && need_subgroup_ge_mask)
			
 
				+		if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size))
			
 
				 		{
			
 
				 			uint32_t offset = ir.increase_bound_by(2);
			
 
				 			uint32_t type_ptr_id = offset;
			
@@ -1265,7 +1265,8 @@ void CompilerMSL::preprocess_op_codes()
 
				 		add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
			
 
				 	}
			
 
				 
			
 
				-	// Metal vertex functions that write to resources must disable rasterization and return void.
			
 
				+	// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
			
 
				+	// resources must disable rasterization and return void.
			
 
				 	if (preproc.uses_resource_write)
			
 
				 		is_rasterization_disabled = true;
			
 
				 
			
@@ -1280,6 +1281,8 @@ void CompilerMSL::preprocess_op_codes()
 
				 
			
 
				 	if (preproc.needs_subgroup_invocation_id)
			
 
				 		needs_subgroup_invocation_id = true;
			
 
				+	if (preproc.needs_subgroup_size)
			
 
				+		needs_subgroup_size = true;
			
 
				 }
			
 
				 
			
 
				 // Move the Private and Workgroup global variables to the entry function.
			
@@ -1372,7 +1375,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 
				 				// Use Metal's native frame-buffer fetch API for subpass inputs.
			
 
				 				auto &type = get<SPIRType>(ops[0]);
			
 
				 				if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
			
 
				-				    (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses))
			
 
				+				    (!msl_options.use_framebuffer_fetch_subpasses))
			
 
				 				{
			
 
				 					// Implicitly reads gl_FragCoord.
			
 
				 					assert(builtin_frag_coord_id != 0);
			
@@ -4608,6 +4611,59 @@ void CompilerMSL::emit_custom_functions()
 
				 			statement("");
			
 
				 			break;
			
 
				 
			
 
				+		case SPVFuncImplSubgroupBroadcast:
			
 
				+			// Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
			
 
				+			// them as integers.
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return quad_broadcast(value, lane);");
			
 
				+			else
			
 
				+				statement("return simd_broadcast(value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return !!quad_broadcast((ushort)value, lane);");
			
 
				+			else
			
 
				+				statement("return !!simd_broadcast((ushort)value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
			
 
				+			else
			
 
				+				statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplSubgroupBroadcastFirst:
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvSubgroupBroadcastFirst(T value)");
			
 
				+			begin_scope();
			
 
				+			statement("return simd_broadcast_first(value);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvSubgroupBroadcastFirst(bool value)");
			
 
				+			begin_scope();
			
 
				+			statement("return !!simd_broadcast_first((ushort)value);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
			
 
				+			begin_scope();
			
 
				+			statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				 		case SPVFuncImplSubgroupBallot:
			
 
				 			statement("inline uint4 spvSubgroupBallot(bool value)");
			
 
				 			begin_scope();
			
@@ -4631,8 +4687,11 @@ void CompilerMSL::emit_custom_functions()
 
				 			break;
			
 
				 
			
 
				 		case SPVFuncImplSubgroupBallotFindLSB:
			
 
				-			statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)");
			
 
				+			statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
			
 
				 			begin_scope();
			
 
				+			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
			
 
				+			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
			
 
				+			statement("ballot &= mask;");
			
 
				 			statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
			
 
				 			          "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
			
 
				 			end_scope();
			
@@ -4640,8 +4699,11 @@ void CompilerMSL::emit_custom_functions()
 
				 			break;
			
 
				 
			
 
				 		case SPVFuncImplSubgroupBallotFindMSB:
			
 
				-			statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)");
			
 
				+			statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
			
 
				 			begin_scope();
			
 
				+			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
			
 
				+			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
			
 
				+			statement("ballot &= mask;");
			
 
				 			statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
			
 
				 			          "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
			
 
				 			          "ballot.z == 0), ballot.w == 0);");
			
@@ -4650,24 +4712,31 @@ void CompilerMSL::emit_custom_functions()
 
				 			break;
			
 
				 
			
 
				 		case SPVFuncImplSubgroupBallotBitCount:
			
 
				-			statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)");
			
 
				+			statement("inline uint spvPopCount4(uint4 ballot)");
			
 
				 			begin_scope();
			
 
				 			statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
			
 
				 			end_scope();
			
 
				 			statement("");
			
 
				+			statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
			
 
				+			begin_scope();
			
 
				+			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
			
 
				+			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
			
 
				+			statement("return spvPopCount4(ballot & mask);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				 			statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
			
 
				 			begin_scope();
			
 
				 			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
			
 
				 			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
			
 
				 			          "uint2(0));");
			
 
				-			statement("return spvSubgroupBallotBitCount(ballot & mask);");
			
 
				+			statement("return spvPopCount4(ballot & mask);");
			
 
				 			end_scope();
			
 
				 			statement("");
			
 
				 			statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
			
 
				 			begin_scope();
			
 
				 			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
			
 
				 			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
			
 
				-			statement("return spvSubgroupBallotBitCount(ballot & mask);");
			
 
				+			statement("return spvPopCount4(ballot & mask);");
			
 
				 			end_scope();
			
 
				 			statement("");
			
 
				 			break;
			
@@ -4680,7 +4749,7 @@ void CompilerMSL::emit_custom_functions()
 
				 			statement("template<typename T>");
			
 
				 			statement("inline bool spvSubgroupAllEqual(T value)");
			
 
				 			begin_scope();
			
 
				-			statement("return simd_all(value == simd_broadcast_first(value));");
			
 
				+			statement("return simd_all(all(value == simd_broadcast_first(value)));");
			
 
				 			end_scope();
			
 
				 			statement("");
			
 
				 			statement("template<>");
			
@@ -4689,6 +4758,184 @@ void CompilerMSL::emit_custom_functions()
 
				 			statement("return simd_all(value) || !simd_any(value);");
			
 
				 			end_scope();
			
 
				 			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
			
 
				+			begin_scope();
			
 
				+			statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplSubgroupShuffle:
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvSubgroupShuffle(T value, ushort lane)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return quad_shuffle(value, lane);");
			
 
				+			else
			
 
				+				statement("return simd_shuffle(value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return !!quad_shuffle((ushort)value, lane);");
			
 
				+			else
			
 
				+				statement("return !!simd_shuffle((ushort)value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
			
 
				+			else
			
 
				+				statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplSubgroupShuffleXor:
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return quad_shuffle_xor(value, mask);");
			
 
				+			else
			
 
				+				statement("return simd_shuffle_xor(value, mask);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return !!quad_shuffle_xor((ushort)value, mask);");
			
 
				+			else
			
 
				+				statement("return !!simd_shuffle_xor((ushort)value, mask);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
			
 
				+			else
			
 
				+				statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplSubgroupShuffleUp:
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return quad_shuffle_up(value, delta);");
			
 
				+			else
			
 
				+				statement("return simd_shuffle_up(value, delta);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return !!quad_shuffle_up((ushort)value, delta);");
			
 
				+			else
			
 
				+				statement("return !!simd_shuffle_up((ushort)value, delta);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
			
 
				+			else
			
 
				+				statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplSubgroupShuffleDown:
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return quad_shuffle_down(value, delta);");
			
 
				+			else
			
 
				+				statement("return simd_shuffle_down(value, delta);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return !!quad_shuffle_down((ushort)value, delta);");
			
 
				+			else
			
 
				+				statement("return !!simd_shuffle_down((ushort)value, delta);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
			
 
				+			begin_scope();
			
 
				+			if (msl_options.is_ios())
			
 
				+				statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
			
 
				+			else
			
 
				+				statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplQuadBroadcast:
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvQuadBroadcast(T value, uint lane)");
			
 
				+			begin_scope();
			
 
				+			statement("return quad_broadcast(value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvQuadBroadcast(bool value, uint lane)");
			
 
				+			begin_scope();
			
 
				+			statement("return !!quad_broadcast((ushort)value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)");
			
 
				+			begin_scope();
			
 
				+			statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			break;
			
 
				+
			
 
				+		case SPVFuncImplQuadSwap:
			
 
				+			// We can implement this easily based on the following table giving
			
 
				+			// the target lane ID from the direction and current lane ID:
			
 
				+			//        Direction
			
 
				+			//      | 0 | 1 | 2 |
			
 
				+			//   ---+---+---+---+
			
 
				+			// L 0  | 1   2   3
			
 
				+			// a 1  | 0   3   2
			
 
				+			// n 2  | 3   0   1
			
 
				+			// e 3  | 2   1   0
			
 
				+			// Notice that target = source ^ (direction + 1).
			
 
				+			statement("template<typename T>");
			
 
				+			statement("inline T spvQuadSwap(T value, uint dir)");
			
 
				+			begin_scope();
			
 
				+			statement("return quad_shuffle_xor(value, dir + 1);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<>");
			
 
				+			statement("inline bool spvQuadSwap(bool value, uint dir)");
			
 
				+			begin_scope();
			
 
				+			statement("return !!quad_shuffle_xor((ushort)value, dir + 1);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				+			statement("template<uint N>");
			
 
				+			statement("inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)");
			
 
				+			begin_scope();
			
 
				+			statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);");
			
 
				+			end_scope();
			
 
				+			statement("");
			
 
				 			break;
			
 
				 
			
 
				 		case SPVFuncImplReflectScalar:
			
@@ -7168,7 +7415,7 @@ void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse)
 
				 	if (sparse)
			
 
				 		SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL.");
			
 
				 
			
 
				-	if (msl_options.is_ios() && msl_options.ios_use_framebuffer_fetch_subpasses)
			
 
				+	if (msl_options.use_framebuffer_fetch_subpasses)
			
 
				 	{
			
 
				 		auto *ops = stream(i);
			
 
				 
			
@@ -8265,25 +8512,26 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-	if (args.base.is_fetch && args.offset)
			
 
				-	{
			
 
				-		// Fetch offsets must be applied directly to the coordinate.
			
 
				-		forward = forward && should_forward(args.offset);
			
 
				-		auto &type = expression_type(args.offset);
			
 
				-		if (type.basetype != SPIRType::UInt)
			
 
				-			tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset);
			
 
				-		else
			
 
				-			tex_coords += " + " + to_enclosed_expression(args.offset);
			
 
				-	}
			
 
				-	else if (args.base.is_fetch && args.coffset)
			
 
				+	if (args.base.is_fetch && (args.offset || args.coffset))
			
 
				 	{
			
 
				+		uint32_t offset_expr = args.offset ? args.offset : args.coffset;
			
 
				 		// Fetch offsets must be applied directly to the coordinate.
			
 
				-		forward = forward && should_forward(args.coffset);
			
 
				-		auto &type = expression_type(args.coffset);
			
 
				-		if (type.basetype != SPIRType::UInt)
			
 
				-			tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.coffset);
			
 
				+		forward = forward && should_forward(offset_expr);
			
 
				+		auto &type = expression_type(offset_expr);
			
 
				+		if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D)
			
 
				+		{
			
 
				+			if (type.basetype != SPIRType::UInt)
			
 
				+				tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, offset_expr), ", 0)");
			
 
				+			else
			
 
				+				tex_coords += join(" + uint2(", to_enclosed_expression(offset_expr), ", 0)");
			
 
				+		}
			
 
				 		else
			
 
				-			tex_coords += " + " + to_enclosed_expression(args.coffset);
			
 
				+		{
			
 
				+			if (type.basetype != SPIRType::UInt)
			
 
				+				tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset_expr);
			
 
				+			else
			
 
				+				tex_coords += " + " + to_enclosed_expression(offset_expr);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	// If projection, use alt coord as divisor
			
@@ -8454,6 +8702,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
 
				 		string grad_opt;
			
 
				 		switch (imgtype.image.dim)
			
 
				 		{
			
 
				+		case Dim1D:
			
 
				 		case Dim2D:
			
 
				 			grad_opt = "2d";
			
 
				 			break;
			
@@ -8489,30 +8738,42 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
 
				 
			
 
				 	// Add offsets
			
 
				 	string offset_expr;
			
 
				+	const SPIRType *offset_type = nullptr;
			
 
				 	if (args.coffset && !args.base.is_fetch)
			
 
				 	{
			
 
				 		forward = forward && should_forward(args.coffset);
			
 
				 		offset_expr = to_expression(args.coffset);
			
 
				+		offset_type = &expression_type(args.coffset);
			
 
				 	}
			
 
				 	else if (args.offset && !args.base.is_fetch)
			
 
				 	{
			
 
				 		forward = forward && should_forward(args.offset);
			
 
				 		offset_expr = to_expression(args.offset);
			
 
				+		offset_type = &expression_type(args.offset);
			
 
				 	}
			
 
				 
			
 
				 	if (!offset_expr.empty())
			
 
				 	{
			
 
				 		switch (imgtype.image.dim)
			
 
				 		{
			
 
				+		case Dim1D:
			
 
				+			if (!msl_options.texture_1D_as_2D)
			
 
				+				break;
			
 
				+			if (offset_type->vecsize > 1)
			
 
				+				offset_expr = enclose_expression(offset_expr) + ".x";
			
 
				+
			
 
				+			farg_str += join(", int2(", offset_expr, ", 0)");
			
 
				+			break;
			
 
				+
			
 
				 		case Dim2D:
			
 
				-			if (coord_type.vecsize > 2)
			
 
				+			if (offset_type->vecsize > 2)
			
 
				 				offset_expr = enclose_expression(offset_expr) + ".xy";
			
 
				 
			
 
				 			farg_str += ", " + offset_expr;
			
 
				 			break;
			
 
				 
			
 
				 		case Dim3D:
			
 
				-			if (coord_type.vecsize > 3)
			
 
				+			if (offset_type->vecsize > 3)
			
 
				 				offset_expr = enclose_expression(offset_expr) + ".xyz";
			
 
				 
			
 
				 			farg_str += ", " + offset_expr;
			
@@ -8532,7 +8793,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
 
				 		if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
			
 
				 		{
			
 
				 			forward = forward && should_forward(args.component);
			
 
				-			farg_str += ", " + to_component_argument(args.component);
			
 
				+
			
 
				+			if (const auto *var = maybe_get_backing_variable(img))
			
 
				+				if (!image_is_comparison(get<SPIRType>(var->basetype), var->self))
			
 
				+					farg_str += ", " + to_component_argument(args.component);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -8962,9 +9226,9 @@ string CompilerMSL::to_swizzle_expression(uint32_t id)
 
				 	auto index = expr.find_first_of('[');
			
 
				 
			
 
				 	// If an image is part of an argument buffer translate this to a legal identifier.
			
 
				-	for (auto &c : expr)
			
 
				-		if (c == '.')
			
 
				-			c = '_';
			
 
				+	string::size_type period = 0;
			
 
				+	while ((period = expr.find_first_of('.', period)) != string::npos && period < index)
			
 
				+		expr[period] = '_';
			
 
				 
			
 
				 	if (index == string::npos)
			
 
				 		return expr + swizzle_name_suffix;
			
@@ -9828,9 +10092,9 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 
				 				if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
			
 
				 				{
			
 
				 					if (!msl_options.supports_msl_version(2))
			
 
				-						SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0.");
			
 
				-					if (!msl_options.is_ios())
			
 
				-						SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS.");
			
 
				+						SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0.");
			
 
				+					if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
			
 
				+						SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3.");
			
 
				 					ep_args += ", post_depth_coverage";
			
 
				 				}
			
 
				 				ep_args += "]]";
			
@@ -10207,6 +10471,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				+				if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
			
 
				+					SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3.");
			
 
				 				ep_args += image_type_glsl(type, var_id) + " " + r.name;
			
 
				 				ep_args += " [[color(" + convert_to_string(r.index) + ")]]";
			
 
				 			}
			
@@ -10449,7 +10715,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 
				 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
			
 
				 				entry_func.fixup_hooks_in.push_back([=]() {
			
 
				 					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
			
 
				-					          to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
			
 
				+					          to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
			
 
				 				});
			
@@ -10461,25 +10727,25 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 
				 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
			
 
				 				entry_func.fixup_hooks_in.push_back([=]() {
			
 
				 					// Case where index < 32, size < 32:
			
 
				-					// mask0 = bfe(0xFFFFFFFF, index, size - index);
			
 
				-					// mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0
			
 
				+					// mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
			
 
				+					// mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0
			
 
				 					// Case where index < 32 but size >= 32:
			
 
				-					// mask0 = bfe(0xFFFFFFFF, index, 32 - index);
			
 
				-					// mask1 = bfe(0xFFFFFFFF, 0, size - 32);
			
 
				+					// mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index);
			
 
				+					// mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32);
			
 
				 					// Case where index >= 32:
			
 
				-					// mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0
			
 
				-					// mask1 = bfe(0xFFFFFFFF, index - 32, size - index);
			
 
				+					// mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0
			
 
				+					// mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index);
			
 
				 					// This is expressed without branches to avoid divergent
			
 
				 					// control flow--hence the complicated min/max expressions.
			
 
				 					// This is further complicated by the fact that if you attempt
			
 
				-					// to bfe out-of-bounds on Metal, undefined behavior is the
			
 
				+					// to bfi/bfe out-of-bounds on Metal, undefined behavior is the
			
 
				 					// result.
			
 
				 					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
			
 
				-					          " = uint4(extract_bits(0xFFFFFFFF, min(",
			
 
				+					          " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
			
 
				 					          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id),
			
 
				-					          ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
			
 
				+					          ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
			
 
				 					          to_expression(builtin_subgroup_size_id), " - (int)max(",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
			
@@ -10494,11 +10760,11 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 
				 					// The same logic applies here, except now the index is one
			
 
				 					// more than the subgroup invocation ID.
			
 
				 					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
			
 
				-					          " = uint4(extract_bits(0xFFFFFFFF, min(",
			
 
				+					          " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
			
 
				 					          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id),
			
 
				-					          " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
			
 
				+					          " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
			
 
				 					          to_expression(builtin_subgroup_size_id), " - (int)max(",
			
 
				 					          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
			
@@ -10834,8 +11100,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
 
				 
			
 
				 bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const
			
 
				 {
			
 
				-	return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && msl_options.is_ios() &&
			
 
				-	       msl_options.ios_use_framebuffer_fetch_subpasses;
			
 
				+	return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
			
 
				+	       msl_options.use_framebuffer_fetch_subpasses;
			
 
				 }
			
 
				 
			
 
				 string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
			
@@ -11062,6 +11328,11 @@ void CompilerMSL::replace_illegal_names()
 
				 		"fragment",
			
 
				 		"compute",
			
 
				 		"bias",
			
 
				+		"level",
			
 
				+		"gradient2d",
			
 
				+		"gradientcube",
			
 
				+		"gradient3d",
			
 
				+		"min_lod_clamp",
			
 
				 		"assert",
			
 
				 		"VARIABLE_TRACEPOINT",
			
 
				 		"STATIC_DATA_TRACEPOINT",
			
@@ -11850,12 +12121,11 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformBroadcast:
			
 
				-		emit_binary_func_op(result_type, id, ops[3], ops[4],
			
 
				-		                    msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformBroadcastFirst:
			
 
				-		emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first");
			
 
				+		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformBallot:
			
@@ -11871,46 +12141,50 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformBallotFindLSB:
			
 
				-		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformBallotFindMSB:
			
 
				-		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformBallotBitCount:
			
 
				 	{
			
 
				 		auto operation = static_cast<GroupOperation>(ops[3]);
			
 
				-		if (operation == GroupOperationReduce)
			
 
				-			emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount");
			
 
				-		else if (operation == GroupOperationInclusiveScan)
			
 
				+		switch (operation)
			
 
				+		{
			
 
				+		case GroupOperationReduce:
			
 
				+			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount");
			
 
				+			break;
			
 
				+		case GroupOperationInclusiveScan:
			
 
				 			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
			
 
				 			                    "spvSubgroupBallotInclusiveBitCount");
			
 
				-		else if (operation == GroupOperationExclusiveScan)
			
 
				+			break;
			
 
				+		case GroupOperationExclusiveScan:
			
 
				 			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
			
 
				 			                    "spvSubgroupBallotExclusiveBitCount");
			
 
				-		else
			
 
				+			break;
			
 
				+		default:
			
 
				 			SPIRV_CROSS_THROW("Invalid BitCount operation.");
			
 
				+			break;
			
 
				+		}
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	case OpGroupNonUniformShuffle:
			
 
				-		emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformShuffleXor:
			
 
				-		emit_binary_func_op(result_type, id, ops[3], ops[4],
			
 
				-		                    msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformShuffleUp:
			
 
				-		emit_binary_func_op(result_type, id, ops[3], ops[4],
			
 
				-		                    msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformShuffleDown:
			
 
				-		emit_binary_func_op(result_type, id, ops[3], ops[4],
			
 
				-		                    msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown");
			
 
				 		break;
			
 
				 
			
 
				 	case OpGroupNonUniformAll:
			
@@ -12018,26 +12292,11 @@ case OpGroupNonUniform##op: \
 
				 #undef MSL_GROUP_OP_CAST
			
 
				 
			
 
				 	case OpGroupNonUniformQuadSwap:
			
 
				-	{
			
 
				-		// We can implement this easily based on the following table giving
			
 
				-		// the target lane ID from the direction and current lane ID:
			
 
				-		//        Direction
			
 
				-		//      | 0 | 1 | 2 |
			
 
				-		//   ---+---+---+---+
			
 
				-		// L 0  | 1   2   3
			
 
				-		// a 1  | 0   3   2
			
 
				-		// n 2  | 3   0   1
			
 
				-		// e 3  | 2   1   0
			
 
				-		// Notice that target = source ^ (direction + 1).
			
 
				-		uint32_t mask = evaluate_constant_u32(ops[4]) + 1;
			
 
				-		uint32_t mask_id = ir.increase_bound_by(1);
			
 
				-		set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false);
			
 
				-		emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap");
			
 
				 		break;
			
 
				-	}
			
 
				 
			
 
				 	case OpGroupNonUniformQuadBroadcast:
			
 
				-		emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast");
			
 
				 		break;
			
 
				 
			
 
				 	default:
			
@@ -12930,7 +13189,8 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
 
				 	}
			
 
				 
			
 
				 	case OpImageWrite:
			
 
				-		uses_resource_write = true;
			
 
				+		if (!compiler.msl_options.supports_msl_version(2, 2))
			
 
				+			uses_resource_write = true;
			
 
				 		break;
			
 
				 
			
 
				 	case OpStore:
			
@@ -12990,8 +13250,15 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
 
				 		needs_subgroup_invocation_id = true;
			
 
				 		break;
			
 
				 
			
 
				+	case OpGroupNonUniformBallotFindLSB:
			
 
				+	case OpGroupNonUniformBallotFindMSB:
			
 
				+		needs_subgroup_size = true;
			
 
				+		break;
			
 
				+
			
 
				 	case OpGroupNonUniformBallotBitCount:
			
 
				-		if (args[3] != GroupOperationReduce)
			
 
				+		if (args[3] == GroupOperationReduce)
			
 
				+			needs_subgroup_size = true;
			
 
				+		else
			
 
				 			needs_subgroup_invocation_id = true;
			
 
				 		break;
			
 
				 
			
@@ -13035,7 +13302,8 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
 
				 {
			
 
				 	auto *p_var = compiler.maybe_get_backing_variable(var_id);
			
 
				 	StorageClass sc = p_var ? p_var->storage : StorageClassMax;
			
 
				-	if (sc == StorageClassUniform || sc == StorageClassStorageBuffer)
			
 
				+	if (!compiler.msl_options.supports_msl_version(2, 1) &&
			
 
				+	    (sc == StorageClassUniform || sc == StorageClassStorageBuffer))
			
 
				 		uses_resource_write = true;
			
 
				 }
			
 
				 
			
@@ -13174,6 +13442,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	case OpGroupNonUniformBroadcast:
			
 
				+		return SPVFuncImplSubgroupBroadcast;
			
 
				+
			
 
				+	case OpGroupNonUniformBroadcastFirst:
			
 
				+		return SPVFuncImplSubgroupBroadcastFirst;
			
 
				+
			
 
				 	case OpGroupNonUniformBallot:
			
 
				 		return SPVFuncImplSubgroupBallot;
			
 
				 
			
@@ -13193,6 +13467,24 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 
				 	case OpGroupNonUniformAllEqual:
			
 
				 		return SPVFuncImplSubgroupAllEqual;
			
 
				 
			
 
				+	case OpGroupNonUniformShuffle:
			
 
				+		return SPVFuncImplSubgroupShuffle;
			
 
				+
			
 
				+	case OpGroupNonUniformShuffleXor:
			
 
				+		return SPVFuncImplSubgroupShuffleXor;
			
 
				+
			
 
				+	case OpGroupNonUniformShuffleUp:
			
 
				+		return SPVFuncImplSubgroupShuffleUp;
			
 
				+
			
 
				+	case OpGroupNonUniformShuffleDown:
			
 
				+		return SPVFuncImplSubgroupShuffleDown;
			
 
				+
			
 
				+	case OpGroupNonUniformQuadBroadcast:
			
 
				+		return SPVFuncImplQuadBroadcast;
			
 
				+
			
 
				+	case OpGroupNonUniformQuadSwap:
			
 
				+		return SPVFuncImplQuadSwap;
			
 
				+
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
--- a/3rdparty/spirv-cross/spirv_msl.hpp
+++ b/3rdparty/spirv-cross/spirv_msl.hpp
@@ -315,7 +315,7 @@ public:
 
				 		bool ios_support_base_vertex_instance = false;
			
 
				 
			
 
				 		// Use Metal's native frame-buffer fetch API for subpass inputs.
			
 
				-		bool ios_use_framebuffer_fetch_subpasses = false;
			
 
				+		bool use_framebuffer_fetch_subpasses = false;
			
 
				 
			
 
				 		// Enables use of "fma" intrinsic for invariant float math
			
 
				 		bool invariant_float_math = false;
			
@@ -600,12 +600,20 @@ protected:
 
				 		SPVFuncImplTextureSwizzle,
			
 
				 		SPVFuncImplGatherSwizzle,
			
 
				 		SPVFuncImplGatherCompareSwizzle,
			
 
				+		SPVFuncImplSubgroupBroadcast,
			
 
				+		SPVFuncImplSubgroupBroadcastFirst,
			
 
				 		SPVFuncImplSubgroupBallot,
			
 
				 		SPVFuncImplSubgroupBallotBitExtract,
			
 
				 		SPVFuncImplSubgroupBallotFindLSB,
			
 
				 		SPVFuncImplSubgroupBallotFindMSB,
			
 
				 		SPVFuncImplSubgroupBallotBitCount,
			
 
				 		SPVFuncImplSubgroupAllEqual,
			
 
				+		SPVFuncImplSubgroupShuffle,
			
 
				+		SPVFuncImplSubgroupShuffleXor,
			
 
				+		SPVFuncImplSubgroupShuffleUp,
			
 
				+		SPVFuncImplSubgroupShuffleDown,
			
 
				+		SPVFuncImplQuadBroadcast,
			
 
				+		SPVFuncImplQuadSwap,
			
 
				 		SPVFuncImplReflectScalar,
			
 
				 		SPVFuncImplRefractScalar,
			
 
				 		SPVFuncImplFaceForwardScalar,
			
@@ -913,6 +921,7 @@ protected:
 
				 	bool used_swizzle_buffer = false;
			
 
				 	bool added_builtin_tess_level = false;
			
 
				 	bool needs_subgroup_invocation_id = false;
			
 
				+	bool needs_subgroup_size = false;
			
 
				 	std::string qual_pos_var_name;
			
 
				 	std::string stage_in_var_name = "in";
			
 
				 	std::string stage_out_var_name = "out";
			
@@ -984,6 +993,7 @@ protected:
 
				 		bool uses_atomics = false;
			
 
				 		bool uses_resource_write = false;
			
 
				 		bool needs_subgroup_invocation_id = false;
			
 
				+		bool needs_subgroup_size = false;
			
 
				 	};
			
 
				 
			
 
				 	// OpcodeHandler that scans for uses of sampled images