4 gadi atpakaļ · 6ca5060ce8
--- a/3rdparty/spirv-cross/main.cpp
+++ b/3rdparty/spirv-cross/main.cpp
@@ -285,6 +285,61 @@ static bool write_string_to_file(const char *path, const char *string)
 
				 #pragma warning(pop)
			
 
				 #endif
			
 
				 
			
 
				+static void print_resources(const Compiler &compiler, spv::StorageClass storage,
			
 
				+                            const SmallVector<BuiltInResource> &resources)
			
 
				+{
			
 
				+	fprintf(stderr, "%s\n", storage == StorageClassInput ? "builtin inputs" : "builtin outputs");
			
 
				+	fprintf(stderr, "=============\n\n");
			
 
				+	for (auto &res : resources)
			
 
				+	{
			
 
				+		bool active = compiler.has_active_builtin(res.builtin, storage);
			
 
				+		const char *basetype = "?";
			
 
				+		auto &type = compiler.get_type(res.value_type_id);
			
 
				+		switch (type.basetype)
			
 
				+		{
			
 
				+		case SPIRType::Float: basetype = "float"; break;
			
 
				+		case SPIRType::Int: basetype = "int"; break;
			
 
				+		case SPIRType::UInt: basetype = "uint"; break;
			
 
				+		default: break;
			
 
				+		}
			
 
				+
			
 
				+		uint32_t array_size = 0;
			
 
				+		bool array_size_literal = false;
			
 
				+		if (!type.array.empty())
			
 
				+		{
			
 
				+			array_size = type.array.front();
			
 
				+			array_size_literal = type.array_size_literal.front();
			
 
				+		}
			
 
				+
			
 
				+		string type_str = basetype;
			
 
				+		if (type.vecsize > 1)
			
 
				+			type_str += std::to_string(type.vecsize);
			
 
				+
			
 
				+		if (array_size)
			
 
				+		{
			
 
				+			if (array_size_literal)
			
 
				+				type_str += join("[", array_size, "]");
			
 
				+			else
			
 
				+				type_str += join("[", array_size, " (spec constant ID)]");
			
 
				+		}
			
 
				+
			
 
				+		string builtin_str;
			
 
				+		switch (res.builtin)
			
 
				+		{
			
 
				+		case spv::BuiltInPosition: builtin_str = "Position"; break;
			
 
				+		case spv::BuiltInPointSize: builtin_str = "PointSize"; break;
			
 
				+		case spv::BuiltInCullDistance: builtin_str = "CullDistance"; break;
			
 
				+		case spv::BuiltInClipDistance: builtin_str = "ClipDistance"; break;
			
 
				+		case spv::BuiltInTessLevelInner: builtin_str = "TessLevelInner"; break;
			
 
				+		case spv::BuiltInTessLevelOuter: builtin_str = "TessLevelOuter"; break;
			
 
				+		default: builtin_str = string("builtin #") + to_string(res.builtin);
			
 
				+		}
			
 
				+
			
 
				+		fprintf(stderr, "Builtin %s (%s) (active: %s).\n", builtin_str.c_str(), type_str.c_str(), active ? "yes" : "no");
			
 
				+	}
			
 
				+	fprintf(stderr, "=============\n\n");
			
 
				+}
			
 
				+
			
 
				 static void print_resources(const Compiler &compiler, const char *tag, const SmallVector<Resource> &resources)
			
 
				 {
			
 
				 	fprintf(stderr, "%s\n", tag);
			
@@ -475,6 +530,8 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 
				 	print_resources(compiler, "push", res.push_constant_buffers);
			
 
				 	print_resources(compiler, "counters", res.atomic_counters);
			
 
				 	print_resources(compiler, "acceleration structures", res.acceleration_structures);
			
 
				+	print_resources(compiler, spv::StorageClassInput, res.builtin_inputs);
			
 
				+	print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs);
			
 
				 }
			
 
				 
			
 
				 static void print_push_constant_resources(const Compiler &compiler, const SmallVector<Resource> &res)
			
@@ -621,6 +678,8 @@ struct CLIArguments
 
				 	SmallVector<VariableTypeRemap> variable_type_remaps;
			
 
				 	SmallVector<InterfaceVariableRename> interface_variable_renames;
			
 
				 	SmallVector<HLSLVertexAttributeRemap> hlsl_attr_remap;
			
 
				+	SmallVector<std::pair<uint32_t, uint32_t>> masked_stage_outputs;
			
 
				+	SmallVector<BuiltIn> masked_stage_builtins;
			
 
				 	string entry;
			
 
				 	string entry_stage;
			
 
				 
			
@@ -845,6 +904,11 @@ static void print_help_common()
 
				 	                "\t\tGLSL: Rewrites [0, w] Z range (D3D/Metal/Vulkan) to GL-style [-w, w].\n"
			
 
				 	                "\t\tHLSL/MSL: Rewrites [-w, w] Z range (GL) to D3D/Metal/Vulkan-style [0, w].\n"
			
 
				 	                "\t[--flip-vert-y]:\n\t\tInverts gl_Position.y (or equivalent) at the end of a vertex shader. This is equivalent to using negative viewport height.\n"
			
 
				+	                "\t[--mask-stage-output-location <location> <component>]:\n"
			
 
				+	                "\t\tIf a stage output variable with matching location and component is active, optimize away the variable if applicable.\n"
			
 
				+	                "\t[--mask-stage-output-builtin <Position|PointSize|ClipDistance|CullDistance>]:\n"
			
 
				+	                "\t\tIf a stage output variable with matching builtin is active, "
			
 
				+	                "optimize away the variable if it can affect cross-stage linking correctness.\n"
			
 
				 	);
			
 
				 	// clang-format on
			
 
				 }
			
@@ -1103,6 +1167,11 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 
				 		compiler->set_variable_type_remap_callback(move(remap_cb));
			
 
				 	}
			
 
				 
			
 
				+	for (auto &masked : args.masked_stage_outputs)
			
 
				+		compiler->mask_stage_output_by_location(masked.first, masked.second);
			
 
				+	for (auto &masked : args.masked_stage_builtins)
			
 
				+		compiler->mask_stage_output_by_builtin(masked);
			
 
				+
			
 
				 	for (auto &rename : args.entry_point_rename)
			
 
				 		compiler->rename_entry_point(rename.old_name, rename.new_name, rename.execution_model);
			
 
				 
			
@@ -1346,6 +1415,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 
				 
			
 
				 	if (args.dump_resources)
			
 
				 	{
			
 
				+		compiler->update_active_builtins();
			
 
				 		print_resources(*compiler, res);
			
 
				 		print_push_constant_resources(*compiler, res.push_constant_buffers);
			
 
				 		print_spec_constants(*compiler);
			
@@ -1571,6 +1641,31 @@ static int main_inner(int argc, char *argv[])
 
				 	cbs.add("--no-support-nonzero-baseinstance", [&](CLIParser &) { args.support_nonzero_baseinstance = false; });
			
 
				 	cbs.add("--emit-line-directives", [&args](CLIParser &) { args.emit_line_directives = true; });
			
 
				 
			
 
				+	cbs.add("--mask-stage-output-location", [&](CLIParser &parser) {
			
 
				+		uint32_t location = parser.next_uint();
			
 
				+		uint32_t component = parser.next_uint();
			
 
				+		args.masked_stage_outputs.push_back({ location, component });
			
 
				+	});
			
 
				+
			
 
				+	cbs.add("--mask-stage-output-builtin", [&](CLIParser &parser) {
			
 
				+		BuiltIn masked_builtin = BuiltInMax;
			
 
				+		std::string builtin = parser.next_string();
			
 
				+		if (builtin == "Position")
			
 
				+			masked_builtin = BuiltInPosition;
			
 
				+		else if (builtin == "PointSize")
			
 
				+			masked_builtin = BuiltInPointSize;
			
 
				+		else if (builtin == "CullDistance")
			
 
				+			masked_builtin = BuiltInCullDistance;
			
 
				+		else if (builtin == "ClipDistance")
			
 
				+			masked_builtin = BuiltInClipDistance;
			
 
				+		else
			
 
				+		{
			
 
				+			print_help();
			
 
				+			exit(EXIT_FAILURE);
			
 
				+		}
			
 
				+		args.masked_stage_builtins.push_back(masked_builtin);
			
 
				+	});
			
 
				+
			
 
				 	cbs.default_handler = [&args](const char *value) { args.input = value; };
			
 
				 	cbs.add("-", [&args](CLIParser &) { args.input = "-"; });
			
 
				 	cbs.error_handler = [] { print_help(); };
			
--- a/3rdparty/spirv-cross/spirv_common.hpp
+++ b/3rdparty/spirv-cross/spirv_common.hpp
@@ -1763,6 +1763,22 @@ struct SetBindingPair
 
				 	}
			
 
				 };
			
 
				 
			
 
				+struct LocationComponentPair
			
 
				+{
			
 
				+	uint32_t location;
			
 
				+	uint32_t component;
			
 
				+
			
 
				+	inline bool operator==(const LocationComponentPair &other) const
			
 
				+	{
			
 
				+		return location == other.location && component == other.component;
			
 
				+	}
			
 
				+
			
 
				+	inline bool operator<(const LocationComponentPair &other) const
			
 
				+	{
			
 
				+		return location < other.location || (location == other.location && component < other.component);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				 struct StageSetBinding
			
 
				 {
			
 
				 	spv::ExecutionModel model;
			
@@ -1785,6 +1801,14 @@ struct InternalHasher
 
				 		return (hash_set * 0x10001b31) ^ hash_binding;
			
 
				 	}
			
 
				 
			
 
				+	inline size_t operator()(const LocationComponentPair &value) const
			
 
				+	{
			
 
				+		// Quality of hash doesn't really matter here.
			
 
				+		auto hash_set = std::hash<uint32_t>()(value.location);
			
 
				+		auto hash_binding = std::hash<uint32_t>()(value.component);
			
 
				+		return (hash_set * 0x10001b31) ^ hash_binding;
			
 
				+	}
			
 
				+
			
 
				 	inline size_t operator()(const StageSetBinding &value) const
			
 
				 	{
			
 
				 		// Quality of hash doesn't really matter here.
			
--- a/3rdparty/spirv-cross/spirv_cross.cpp
+++ b/3rdparty/spirv-cross/spirv_cross.cpp
@@ -284,31 +284,6 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
 
				 	return var;
			
 
				 }
			
 
				 
			
 
				-StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr)
			
 
				-{
			
 
				-	auto *var = maybe_get_backing_variable(ptr);
			
 
				-
			
 
				-	// If the expression has been lowered to a temporary, we need to use the Generic storage class.
			
 
				-	// We're looking for the effective storage class of a given expression.
			
 
				-	// An access chain or forwarded OpLoads from such access chains
			
 
				-	// will generally have the storage class of the underlying variable, but if the load was not forwarded
			
 
				-	// we have lost any address space qualifiers.
			
 
				-	bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
			
 
				-	                        (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
			
 
				-
			
 
				-	if (var && !forced_temporary)
			
 
				-	{
			
 
				-		// Normalize SSBOs to StorageBuffer here.
			
 
				-		if (var->storage == StorageClassUniform &&
			
 
				-		    has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
			
 
				-			return StorageClassStorageBuffer;
			
 
				-		else
			
 
				-			return var->storage;
			
 
				-	}
			
 
				-	else
			
 
				-		return expression_type(ptr).storage;
			
 
				-}
			
 
				-
			
 
				 void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
			
 
				 {
			
 
				 	auto &e = get<SPIRExpression>(expr);
			
@@ -853,19 +828,79 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *
 
				 
			
 
				 		// It is possible for uniform storage classes to be passed as function parameters, so detect
			
 
				 		// that. To detect function parameters, check of StorageClass of variable is function scope.
			
 
				-		if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var))
			
 
				+		if (var.storage == StorageClassFunction || !type.pointer)
			
 
				 			return;
			
 
				 
			
 
				 		if (active_variables && active_variables->find(var.self) == end(*active_variables))
			
 
				 			return;
			
 
				 
			
 
				+		// In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
			
 
				+		// not just IO variables.
			
 
				+		bool active_in_entry_point = true;
			
 
				+		if (ir.get_spirv_version() < 0x10400)
			
 
				+		{
			
 
				+			if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
			
 
				+				active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
			
 
				+		}
			
 
				+		else
			
 
				+			active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
			
 
				+
			
 
				+		if (!active_in_entry_point)
			
 
				+			return;
			
 
				+
			
 
				+		bool is_builtin = is_builtin_variable(var);
			
 
				+
			
 
				+		if (is_builtin)
			
 
				+		{
			
 
				+			if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
			
 
				+				return;
			
 
				+
			
 
				+			auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs;
			
 
				+			BuiltInResource resource;
			
 
				+
			
 
				+			if (has_decoration(type.self, DecorationBlock))
			
 
				+			{
			
 
				+				resource.resource = { var.self, var.basetype, type.self,
			
 
				+				                      get_remapped_declared_block_name(var.self, false) };
			
 
				+
			
 
				+				for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
			
 
				+				{
			
 
				+					resource.value_type_id = type.member_types[i];
			
 
				+					resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn));
			
 
				+					list.push_back(resource);
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				bool strip_array =
			
 
				+						!has_decoration(var.self, DecorationPatch) && (
			
 
				+								get_execution_model() == ExecutionModelTessellationControl ||
			
 
				+								(get_execution_model() == ExecutionModelTessellationEvaluation &&
			
 
				+								 var.storage == StorageClassInput));
			
 
				+
			
 
				+				resource.resource = { var.self, var.basetype, type.self, get_name(var.self) };
			
 
				+
			
 
				+				if (strip_array && !type.array.empty())
			
 
				+					resource.value_type_id = get_variable_data_type(var).parent_type;
			
 
				+				else
			
 
				+					resource.value_type_id = get_variable_data_type_id(var);
			
 
				+
			
 
				+				assert(resource.value_type_id);
			
 
				+
			
 
				+				resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
			
 
				+				list.push_back(std::move(resource));
			
 
				+			}
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				 		// Input
			
 
				-		if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self))
			
 
				+		if (var.storage == StorageClassInput)
			
 
				 		{
			
 
				 			if (has_decoration(type.self, DecorationBlock))
			
 
				 			{
			
 
				 				res.stage_inputs.push_back(
			
 
				-				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
			
 
				+						{ var.self, var.basetype, type.self,
			
 
				+						  get_remapped_declared_block_name(var.self, false) });
			
 
				 			}
			
 
				 			else
			
 
				 				res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
			
@@ -876,12 +911,12 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *
 
				 			res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
			
 
				 		}
			
 
				 		// Outputs
			
 
				-		else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
			
 
				+		else if (var.storage == StorageClassOutput)
			
 
				 		{
			
 
				 			if (has_decoration(type.self, DecorationBlock))
			
 
				 			{
			
 
				 				res.stage_outputs.push_back(
			
 
				-				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
			
 
				+						{ var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
			
 
				 			}
			
 
				 			else
			
 
				 				res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
			
@@ -3185,6 +3220,29 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	case OpSelect:
			
 
				+	{
			
 
				+		// In case of variable pointers, we might access a variable here.
			
 
				+		// We cannot prove anything about these accesses however.
			
 
				+		for (uint32_t i = 1; i < length; i++)
			
 
				+		{
			
 
				+			if (i >= 3)
			
 
				+			{
			
 
				+				auto *var = compiler.maybe_get_backing_variable(args[i]);
			
 
				+				if (var)
			
 
				+				{
			
 
				+					accessed_variables_to_block[var->self].insert(current_block->self);
			
 
				+					// Assume we can get partial writes to this variable.
			
 
				+					partial_write_variables_to_block[var->self].insert(current_block->self);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			// Might try to copy a Phi variable here.
			
 
				+			notify_variable_access(args[i], current_block->self);
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	case OpExtInst:
			
 
				 	{
			
 
				 		for (uint32_t i = 4; i < length; i++)
			
@@ -4071,7 +4129,7 @@ void Compiler::update_active_builtins()
 
				 }
			
 
				 
			
 
				 // Returns whether this shader uses a builtin of the storage class
			
 
				-bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage)
			
 
				+bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const
			
 
				 {
			
 
				 	const Bitset *flags;
			
 
				 	switch (storage)
			
--- a/3rdparty/spirv-cross/spirv_cross.hpp
+++ b/3rdparty/spirv-cross/spirv_cross.hpp
@@ -59,6 +59,27 @@ struct Resource
 
				 	std::string name;
			
 
				 };
			
 
				 
			
 
				+struct BuiltInResource
			
 
				+{
			
 
				+	// This is mostly here to support reflection of builtins such as Position/PointSize/CullDistance/ClipDistance.
			
 
				+	// This needs to be different from Resource since we can collect builtins from blocks.
			
 
				+	// A builtin present here does not necessarily mean it's considered an active builtin,
			
 
				+	// since variable ID "activeness" is only tracked on OpVariable level, not Block members.
			
 
				+	// For that, update_active_builtins() -> has_active_builtin() can be used to further refine the reflection.
			
 
				+	spv::BuiltIn builtin;
			
 
				+
			
 
				+	// This is the actual value type of the builtin.
			
 
				+	// Typically float4, float, array<float, N> for the gl_PerVertex builtins.
			
 
				+	// If the builtin is a control point, the control point array type will be stripped away here as appropriate.
			
 
				+	TypeID value_type_id;
			
 
				+
			
 
				+	// This refers to the base resource which contains the builtin.
			
 
				+	// If resource is a Block, it can hold multiple builtins, or it might not be a block.
			
 
				+	// For advanced reflection scenarios, all information in builtin/value_type_id can be deduced,
			
 
				+	// it's just more convenient this way.
			
 
				+	Resource resource;
			
 
				+};
			
 
				+
			
 
				 struct ShaderResources
			
 
				 {
			
 
				 	SmallVector<Resource> uniform_buffers;
			
@@ -79,6 +100,9 @@ struct ShaderResources
 
				 	// these correspond to separate texture2D and samplers respectively.
			
 
				 	SmallVector<Resource> separate_images;
			
 
				 	SmallVector<Resource> separate_samplers;
			
 
				+
			
 
				+	SmallVector<BuiltInResource> builtin_inputs;
			
 
				+	SmallVector<BuiltInResource> builtin_outputs;
			
 
				 };
			
 
				 
			
 
				 struct CombinedImageSampler
			
@@ -324,7 +348,7 @@ public:
 
				 
			
 
				 	// Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader.
			
 
				 	void update_active_builtins();
			
 
				-	bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage);
			
 
				+	bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage) const;
			
 
				 
			
 
				 	// Query and modify OpExecutionMode.
			
 
				 	const Bitset &get_execution_mode_bitset() const;
			
@@ -647,7 +671,6 @@ protected:
 
				 	bool expression_is_lvalue(uint32_t id) const;
			
 
				 	bool variable_storage_is_aliased(const SPIRVariable &var);
			
 
				 	SPIRVariable *maybe_get_backing_variable(uint32_t chain);
			
 
				-	spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
			
 
				 
			
 
				 	void register_read(uint32_t expr, uint32_t chain, bool forwarded);
			
 
				 	void register_write(uint32_t chain);
			
--- a/3rdparty/spirv-cross/spirv_cross_c.cpp
+++ b/3rdparty/spirv-cross/spirv_cross_c.cpp
@@ -197,8 +197,11 @@ struct spvc_resources_s : ScratchMemoryAllocation
 
				 	SmallVector<spvc_reflected_resource> separate_images;
			
 
				 	SmallVector<spvc_reflected_resource> separate_samplers;
			
 
				 	SmallVector<spvc_reflected_resource> acceleration_structures;
			
 
				+	SmallVector<spvc_reflected_builtin_resource> builtin_inputs;
			
 
				+	SmallVector<spvc_reflected_builtin_resource> builtin_outputs;
			
 
				 
			
 
				 	bool copy_resources(SmallVector<spvc_reflected_resource> &outputs, const SmallVector<Resource> &inputs);
			
 
				+	bool copy_resources(SmallVector<spvc_reflected_builtin_resource> &outputs, const SmallVector<BuiltInResource> &inputs);
			
 
				 	bool copy_resources(const ShaderResources &resources);
			
 
				 };
			
 
				 
			
@@ -818,6 +821,44 @@ spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spv
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler,
			
 
				+                                                        unsigned location, unsigned component)
			
 
				+{
			
 
				+#if SPIRV_CROSS_C_API_GLSL
			
 
				+	if (compiler->backend == SPVC_BACKEND_NONE)
			
 
				+	{
			
 
				+		compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
			
 
				+		return SPVC_ERROR_INVALID_ARGUMENT;
			
 
				+	}
			
 
				+
			
 
				+	static_cast<CompilerGLSL *>(compiler->compiler.get())->mask_stage_output_by_location(location, component);
			
 
				+	return SPVC_SUCCESS;
			
 
				+#else
			
 
				+	(void)location;
			
 
				+	(void)component;
			
 
				+	compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
			
 
				+	return SPVC_ERROR_INVALID_ARGUMENT;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin)
			
 
				+{
			
 
				+#if SPIRV_CROSS_C_API_GLSL
			
 
				+	if (compiler->backend == SPVC_BACKEND_NONE)
			
 
				+	{
			
 
				+		compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
			
 
				+		return SPVC_ERROR_INVALID_ARGUMENT;
			
 
				+	}
			
 
				+
			
 
				+	static_cast<CompilerGLSL *>(compiler->compiler.get())->mask_stage_output_by_builtin(spv::BuiltIn(builtin));
			
 
				+	return SPVC_SUCCESS;
			
 
				+#else
			
 
				+	(void)builtin;
			
 
				+	compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
			
 
				+	return SPVC_ERROR_INVALID_ARGUMENT;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler,
			
 
				                                                          const spvc_hlsl_root_constants *constant_info,
			
 
				                                                          size_t count)
			
@@ -1551,6 +1592,30 @@ bool spvc_resources_s::copy_resources(SmallVector<spvc_reflected_resource> &outp
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+bool spvc_resources_s::copy_resources(SmallVector<spvc_reflected_builtin_resource> &outputs,
			
 
				+                                      const SmallVector<BuiltInResource> &inputs)
			
 
				+{
			
 
				+	for (auto &i : inputs)
			
 
				+	{
			
 
				+		spvc_reflected_builtin_resource br;
			
 
				+
			
 
				+		br.value_type_id = i.value_type_id;
			
 
				+		br.builtin = SpvBuiltIn(i.builtin);
			
 
				+
			
 
				+		auto &r = br.resource;
			
 
				+		r.base_type_id = i.resource.base_type_id;
			
 
				+		r.type_id = i.resource.type_id;
			
 
				+		r.id = i.resource.id;
			
 
				+		r.name = context->allocate_name(i.resource.name);
			
 
				+		if (!r.name)
			
 
				+			return false;
			
 
				+
			
 
				+		outputs.push_back(br);
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 bool spvc_resources_s::copy_resources(const ShaderResources &resources)
			
 
				 {
			
 
				 	if (!copy_resources(uniform_buffers, resources.uniform_buffers))
			
@@ -1577,6 +1642,10 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources)
 
				 		return false;
			
 
				 	if (!copy_resources(acceleration_structures, resources.acceleration_structures))
			
 
				 		return false;
			
 
				+	if (!copy_resources(builtin_inputs, resources.builtin_inputs))
			
 
				+		return false;
			
 
				+	if (!copy_resources(builtin_outputs, resources.builtin_outputs))
			
 
				+		return false;
			
 
				 
			
 
				 	return true;
			
 
				 }
			
@@ -1735,6 +1804,37 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources,
 
				 	return SPVC_SUCCESS;
			
 
				 }
			
 
				 
			
 
				+spvc_result spvc_resources_get_builtin_resource_list_for_type(
			
 
				+		spvc_resources resources, spvc_builtin_resource_type type,
			
 
				+		const spvc_reflected_builtin_resource **resource_list,
			
 
				+		size_t *resource_size)
			
 
				+{
			
 
				+	const SmallVector<spvc_reflected_builtin_resource> *list = nullptr;
			
 
				+	switch (type)
			
 
				+	{
			
 
				+	case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT:
			
 
				+		list = &resources->builtin_inputs;
			
 
				+		break;
			
 
				+
			
 
				+	case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT:
			
 
				+		list = &resources->builtin_outputs;
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	if (!list)
			
 
				+	{
			
 
				+		resources->context->report_error("Invalid argument.");
			
 
				+		return SPVC_ERROR_INVALID_ARGUMENT;
			
 
				+	}
			
 
				+
			
 
				+	*resource_size = list->size();
			
 
				+	*resource_list = list->data();
			
 
				+	return SPVC_SUCCESS;
			
 
				+}
			
 
				+
			
 
				 void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, unsigned argument)
			
 
				 {
			
 
				 	compiler->compiler->set_decoration(id, static_cast<spv::Decoration>(decoration), argument);
			
--- a/3rdparty/spirv-cross/spirv_cross_c.h
+++ b/3rdparty/spirv-cross/spirv_cross_c.h
@@ -40,7 +40,7 @@ extern "C" {
 
				 /* Bumped if ABI or API breaks backwards compatibility. */
			
 
				 #define SPVC_C_API_VERSION_MAJOR 0
			
 
				 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
			
 
				-#define SPVC_C_API_VERSION_MINOR 46
			
 
				+#define SPVC_C_API_VERSION_MINOR 47
			
 
				 /* Bumped if internal implementation details change. */
			
 
				 #define SPVC_C_API_VERSION_PATCH 0
			
 
				 
			
@@ -99,6 +99,13 @@ typedef struct spvc_reflected_resource
 
				 	const char *name;
			
 
				 } spvc_reflected_resource;
			
 
				 
			
 
				+typedef struct spvc_reflected_builtin_resource
			
 
				+{
			
 
				+	SpvBuiltIn builtin;
			
 
				+	spvc_type_id value_type_id;
			
 
				+	spvc_reflected_resource resource;
			
 
				+} spvc_reflected_builtin_resource;
			
 
				+
			
 
				 /* See C++ API. */
			
 
				 typedef struct spvc_entry_point
			
 
				 {
			
@@ -221,6 +228,14 @@ typedef enum spvc_resource_type
 
				 	SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff
			
 
				 } spvc_resource_type;
			
 
				 
			
 
				+typedef enum spvc_builtin_resource_type
			
 
				+{
			
 
				+	SPVC_BUILTIN_RESOURCE_TYPE_UNKNOWN = 0,
			
 
				+	SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT = 1,
			
 
				+	SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT = 2,
			
 
				+	SPVC_BUILTIN_RESOURCE_TYPE_INT_MAX = 0x7fffffff
			
 
				+} spvc_builtin_resource_type;
			
 
				+
			
 
				 /* Maps to spirv_cross::SPIRType::BaseType. */
			
 
				 typedef enum spvc_basetype
			
 
				 {
			
@@ -722,6 +737,10 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler com
 
				 
			
 
				 SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id);
			
 
				 
			
 
				+SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler,
			
 
				+                                                                        unsigned location, unsigned component);
			
 
				+SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin);
			
 
				+
			
 
				 /*
			
 
				  * HLSL specifics.
			
 
				  * Maps to C++ API.
			
@@ -805,6 +824,11 @@ SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resou
 
				                                                                       const spvc_reflected_resource **resource_list,
			
 
				                                                                       size_t *resource_size);
			
 
				 
			
 
				+SPVC_PUBLIC_API spvc_result spvc_resources_get_builtin_resource_list_for_type(
			
 
				+		spvc_resources resources, spvc_builtin_resource_type type,
			
 
				+		const spvc_reflected_builtin_resource **resource_list,
			
 
				+		size_t *resource_size);
			
 
				+
			
 
				 /*
			
 
				  * Decorations.
			
 
				  * Maps to C++ API.
			
--- a/3rdparty/spirv-cross/spirv_glsl.cpp
+++ b/3rdparty/spirv-cross/spirv_glsl.cpp
@@ -559,18 +559,19 @@ string CompilerGLSL::compile()
 
				 {
			
 
				 	ir.fixup_reserved_names();
			
 
				 
			
 
				-	if (options.vulkan_semantics)
			
 
				-		backend.allow_precision_qualifiers = true;
			
 
				-	else
			
 
				+	if (!options.vulkan_semantics)
			
 
				 	{
			
 
				 		// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
			
 
				 		backend.nonuniform_qualifier = "";
			
 
				 		backend.needs_row_major_load_workaround = true;
			
 
				 	}
			
 
				+	backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
			
 
				 	backend.force_gl_in_out_block = true;
			
 
				 	backend.supports_extensions = true;
			
 
				 	backend.use_array_constructor = true;
			
 
				 
			
 
				+	backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320);
			
 
				+
			
 
				 	if (is_legacy_es())
			
 
				 		backend.support_case_fallthrough = false;
			
 
				 
			
@@ -764,6 +765,8 @@ void CompilerGLSL::emit_header()
 
				 			{
			
 
				 				statement("#if defined(GL_AMD_gpu_shader_int16)");
			
 
				 				statement("#extension GL_AMD_gpu_shader_int16 : require");
			
 
				+				statement("#elif defined(GL_NV_gpu_shader5)");
			
 
				+				statement("#extension GL_NV_gpu_shader5 : require");
			
 
				 				statement("#else");
			
 
				 				statement("#error No extension available for Int16.");
			
 
				 				statement("#endif");
			
@@ -4395,6 +4398,16 @@ string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_
 
				 		return to_unpacked_expression(id);
			
 
				 }
			
 
				 
			
 
				+string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
			
 
				+{
			
 
				+	string expr = to_expression(id);
			
 
				+
			
 
				+	if (has_decoration(id, DecorationNonUniform))
			
 
				+		convert_non_uniform_expression(expr, id);
			
 
				+
			
 
				+	return expr;
			
 
				+}
			
 
				+
			
 
				 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
			
 
				 {
			
 
				 	auto itr = invalid_expressions.find(id);
			
@@ -5533,7 +5546,12 @@ void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint3
 
				 
			
 
				 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
			
 
				 {
			
 
				-	bool forward = should_forward(op0) && should_forward(op1);
			
 
				+	// Various FP arithmetic opcodes such as add, sub, mul will hit this.
			
 
				+	bool force_temporary_precise = backend.support_precise_qualifier &&
			
 
				+	                               has_decoration(result_id, DecorationNoContraction) &&
			
 
				+	                               type_is_floating_point(get<SPIRType>(result_type));
			
 
				+	bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
			
 
				+
			
 
				 	emit_op(result_type, result_id,
			
 
				 	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
			
 
				 
			
@@ -5712,6 +5730,27 @@ void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id,
 
				 	inherit_expression_dependencies(result_id, op1);
			
 
				 }
			
 
				 
			
 
				+void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
			
 
				+                                       const char *op)
			
 
				+{
			
 
				+	forced_temporaries.insert(result_id);
			
 
				+	emit_op(result_type, result_id,
			
 
				+	        join(op, "(", to_non_uniform_aware_expression(op0), ", ",
			
 
				+	             to_unpacked_expression(op1), ")"), false);
			
 
				+	flush_all_atomic_capable_variables();
			
 
				+}
			
 
				+
			
 
				+void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
			
 
				+                                       uint32_t op0, uint32_t op1, uint32_t op2,
			
 
				+                                       const char *op)
			
 
				+{
			
 
				+	forced_temporaries.insert(result_id);
			
 
				+	emit_op(result_type, result_id,
			
 
				+	        join(op, "(", to_non_uniform_aware_expression(op0), ", ",
			
 
				+	             to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
			
 
				+	flush_all_atomic_capable_variables();
			
 
				+}
			
 
				+
			
 
				 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
			
 
				                                            SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
			
 
				 {
			
@@ -6214,7 +6253,7 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s
 
				 {
			
 
				 	// Keep track of the array indices we have used to load the image.
			
 
				 	// We'll need to use the same array index into the combined image sampler array.
			
 
				-	auto image_expr = to_expression(image_id);
			
 
				+	auto image_expr = to_non_uniform_aware_expression(image_id);
			
 
				 	string array_expr;
			
 
				 	auto array_index = image_expr.find_first_of('[');
			
 
				 	if (array_index != string::npos)
			
@@ -6442,20 +6481,8 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool
 
				 	auto &result_type = get<SPIRType>(result_type_id);
			
 
				 
			
 
				 	inherited_expressions.push_back(coord);
			
 
				-
			
 
				-	// Make sure non-uniform decoration is back-propagated to where it needs to be.
			
 
				-	if (has_decoration(img, DecorationNonUniformEXT))
			
 
				-	{
			
 
				-		// In Vulkan GLSL, we cannot back-propgate nonuniform qualifiers if we
			
 
				-		// use a combined image sampler constructor.
			
 
				-		// We're only interested in back-propagating if we can trace back through access chains.
			
 
				-		// If not, we will apply nonuniform to the sampled image expression itself.
			
 
				-		auto *backing = maybe_get_backing_variable(img);
			
 
				-		if (backing)
			
 
				-			propagate_nonuniform_qualifier(img);
			
 
				-		else
			
 
				-			nonuniform_expression = true;
			
 
				-	}
			
 
				+	if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
			
 
				+		nonuniform_expression = true;
			
 
				 
			
 
				 	switch (op)
			
 
				 	{
			
@@ -6794,7 +6821,7 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
 
				 					// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
			
 
				 					auto sampled_type = type;
			
 
				 					sampled_type.basetype = SPIRType::SampledImage;
			
 
				-					return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
			
 
				+					return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
			
 
				 					            to_expression(dummy_sampler_id), ")");
			
 
				 				}
			
 
				 				else
			
@@ -6814,7 +6841,7 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	return to_expression(id);
			
 
				+	return to_non_uniform_aware_expression(id);
			
 
				 }
			
 
				 
			
 
				 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
			
@@ -6827,7 +6854,7 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool
 
				 	if (args.base.is_fetch)
			
 
				 		farg_str = convert_separate_image_to_expression(img);
			
 
				 	else
			
 
				-		farg_str = to_expression(img);
			
 
				+		farg_str = to_non_uniform_aware_expression(img);
			
 
				 
			
 
				 	if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
			
 
				 	{
			
@@ -8317,12 +8344,35 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 
				 			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
			
 
				 	}
			
 
				 
			
 
				+	case BuiltInPrimitiveShadingRateKHR:
			
 
				+	{
			
 
				+		if (!options.vulkan_semantics)
			
 
				+			SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
			
 
				+		require_extension_internal("GL_EXT_fragment_shading_rate");
			
 
				+		return "gl_PrimitiveShadingRateEXT";
			
 
				+	}
			
 
				+
			
 
				+	case BuiltInShadingRateKHR:
			
 
				+	{
			
 
				+		if (!options.vulkan_semantics)
			
 
				+			SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
			
 
				+		require_extension_internal("GL_EXT_fragment_shading_rate");
			
 
				+		return "gl_ShadingRateEXT";
			
 
				+	}
			
 
				+
			
 
				 	case BuiltInDeviceIndex:
			
 
				 		if (!options.vulkan_semantics)
			
 
				 			SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
			
 
				 		require_extension_internal("GL_EXT_device_group");
			
 
				 		return "gl_DeviceIndex";
			
 
				 
			
 
				+	case BuiltInFullyCoveredEXT:
			
 
				+		if (!options.es)
			
 
				+			require_extension_internal("GL_NV_conservative_raster_underestimation");
			
 
				+		else
			
 
				+			SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
			
 
				+		return "gl_FragFullyCoveredNV";
			
 
				+
			
 
				 	default:
			
 
				 		return join("gl_BuiltIn_", convert_to_string(builtin));
			
 
				 	}
			
@@ -8345,7 +8395,7 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
			
 
				+void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
			
 
				                                                       AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
			
 
				                                                       uint32_t index)
			
 
				 {
			
@@ -8354,27 +8404,19 @@ void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_
 
				 
			
 
				 	expr += "[";
			
 
				 
			
 
				-	// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
			
 
				-	bool nonuniform_index =
			
 
				-	    has_decoration(index, DecorationNonUniformEXT) &&
			
 
				-	    (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
			
 
				-	if (nonuniform_index)
			
 
				-	{
			
 
				-		expr += backend.nonuniform_qualifier;
			
 
				-		expr += "(";
			
 
				-	}
			
 
				-
			
 
				 	if (index_is_literal)
			
 
				 		expr += convert_to_string(index);
			
 
				 	else
			
 
				 		expr += to_expression(index, register_expression_read);
			
 
				 
			
 
				-	if (nonuniform_index)
			
 
				-		expr += ")";
			
 
				-
			
 
				 	expr += "]";
			
 
				 }
			
 
				 
			
 
				+bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
			
 
				+{
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
			
 
				                                            AccessChainFlags flags, AccessChainMeta *meta)
			
 
				 {
			
@@ -8584,7 +8626,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 				SPIRV_CROSS_THROW("Member index is out of bounds!");
			
 
				 
			
 
				 			BuiltIn builtin;
			
 
				-			if (is_member_builtin(*type, index, &builtin))
			
 
				+			if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
			
 
				 			{
			
 
				 				if (access_chain_is_arrayed)
			
 
				 				{
			
@@ -8652,13 +8694,30 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 
			
 
				 			// Internally, access chain implementation can also be used on composites,
			
 
				 			// ignore scalar access workarounds in this case.
			
 
				-			StorageClass effective_storage;
			
 
				-			if (expression_type(base).pointer)
			
 
				-				effective_storage = get_expression_effective_storage_class(base);
			
 
				+			StorageClass effective_storage = StorageClassGeneric;
			
 
				+			bool ignore_potential_sliced_writes = false;
			
 
				+			if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
			
 
				+			{
			
 
				+				if (expression_type(base).pointer)
			
 
				+					effective_storage = get_expression_effective_storage_class(base);
			
 
				+
			
 
				+				// Special consideration for control points.
			
 
				+				// Control points can only be written by InvocationID, so there is no need
			
 
				+				// to consider scalar access chains here.
			
 
				+				// Cleans up some cases where it's very painful to determine the accurate storage class
			
 
				+				// since blocks can be partially masked ...
			
 
				+				auto *var = maybe_get_backing_variable(base);
			
 
				+				if (var && var->storage == StorageClassOutput &&
			
 
				+				    get_execution_model() == ExecutionModelTessellationControl &&
			
 
				+				    !has_decoration(var->self, DecorationPatch))
			
 
				+				{
			
 
				+					ignore_potential_sliced_writes = true;
			
 
				+				}
			
 
				+			}
			
 
				 			else
			
 
				-				effective_storage = StorageClassGeneric;
			
 
				+				ignore_potential_sliced_writes = true;
			
 
				 
			
 
				-			if (!row_major_matrix_needs_conversion)
			
 
				+			if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
			
 
				 			{
			
 
				 				// On some backends, we might not be able to safely access individual scalars in a vector.
			
 
				 				// To work around this, we might have to cast the access chain reference to something which can,
			
@@ -8698,7 +8757,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 				expr += "]";
			
 
				 			}
			
 
				 
			
 
				-			if (row_major_matrix_needs_conversion)
			
 
				+			if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
			
 
				 			{
			
 
				 				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
			
 
				 				                                       is_packed);
			
@@ -9682,6 +9741,8 @@ void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_ex
 
				 		if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
			
 
				 		{
			
 
				 			auto lhs = to_dereferenced_expression(lhs_expression);
			
 
				+			if (has_decoration(lhs_expression, DecorationNonUniform))
			
 
				+				convert_non_uniform_expression(lhs, lhs_expression);
			
 
				 
			
 
				 			// We might need to cast in order to store to a builtin.
			
 
				 			cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
			
@@ -9857,12 +9918,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		// Also, loading from gl_SampleMask array needs special unroll.
			
 
				 		unroll_array_from_complex_load(id, ptr, expr);
			
 
				 
			
 
				-		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
			
 
				-		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
			
 
				-		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
			
 
				+		if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
			
 
				 		{
			
 
				-			propagate_nonuniform_qualifier(ptr);
			
 
				-			convert_non_uniform_expression(type, expr);
			
 
				+			// If we're loading something non-opaque, we need to handle non-uniform descriptor access.
			
 
				+			convert_non_uniform_expression(expr, ptr);
			
 
				 		}
			
 
				 
			
 
				 		if (forward && ptr_expression)
			
@@ -9885,7 +9944,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 			// it is an array, and our backend does not support arrays as value types.
			
 
				 			// Emit the temporary, and copy it explicitly.
			
 
				 			e = &emit_uninitialized_temporary_expression(result_type, id);
			
 
				-			emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
			
 
				+			emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
			
 
				 		}
			
 
				 		else
			
 
				 			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
			
@@ -9966,9 +10025,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		if (expr.expression_dependencies.empty())
			
 
				 			forwarded_temporaries.erase(ops[1]);
			
 
				 
			
 
				-		if (has_decoration(ops[1], DecorationNonUniformEXT))
			
 
				-			propagate_nonuniform_qualifier(ops[1]);
			
 
				-
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -10006,6 +10062,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		uint32_t result_type = ops[0];
			
 
				 		uint32_t id = ops[1];
			
 
				 		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
			
 
				+		if (has_decoration(ops[2], DecorationNonUniform))
			
 
				+			convert_non_uniform_expression(e, ops[2]);
			
 
				 		set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
			
 
				 		                    true);
			
 
				 		break;
			
@@ -10307,14 +10365,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 			// Including the base will prevent this and would trigger multiple reads
			
 
				 			// from expression causing it to be forced to an actual temporary in GLSL.
			
 
				 			auto expr = access_chain_internal(ops[2], &ops[3], length,
			
 
				-			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
			
 
				+			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
			
 
				+			                                  ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
			
 
				 			e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
			
 
				 			inherit_expression_dependencies(id, ops[2]);
			
 
				 			e->base_expression = ops[2];
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
			
 
				+			auto expr = access_chain_internal(ops[2], &ops[3], length,
			
 
				+			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
			
 
				 			e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
			
 
				 			inherit_expression_dependencies(id, ops[2]);
			
 
				 		}
			
@@ -11221,9 +11281,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		// Ignore semantics for now, probably only relevant to CL.
			
 
				 		uint32_t val = ops[5];
			
 
				 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
			
 
				-		forced_temporaries.insert(id);
			
 
				-		emit_binary_func_op(result_type, id, ptr, val, op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+
			
 
				+		emit_atomic_func_op(result_type, id, ptr, val, op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -11236,9 +11295,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		uint32_t comp = ops[7];
			
 
				 		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
			
 
				 
			
 
				-		forced_temporaries.insert(id);
			
 
				-		emit_trinary_func_op(result_type, id, ptr, comp, val, op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(result_type, id, ptr, comp, val, op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -11253,7 +11310,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
			
 
				 		const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
			
 
				 		const char *increment = unsigned_type ? "0u" : "0";
			
 
				-		emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
			
 
				+		emit_op(ops[0], ops[1],
			
 
				+		        join(op, "(",
			
 
				+		             to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
			
 
				 		flush_all_atomic_capable_variables();
			
 
				 		break;
			
 
				 	}
			
@@ -11266,7 +11325,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		// Ignore semantics for now, probably only relevant to CL.
			
 
				 		uint32_t val = ops[3];
			
 
				 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
			
 
				-		statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");");
			
 
				+		statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
			
 
				 		flush_all_atomic_capable_variables();
			
 
				 		break;
			
 
				 	}
			
@@ -11301,7 +11360,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 			else
			
 
				 				increment = "-1";
			
 
				 
			
 
				-			emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
			
 
				+			emit_op(ops[0], ops[1],
			
 
				+			        join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
			
 
				 		}
			
 
				 
			
 
				 		flush_all_atomic_capable_variables();
			
@@ -11311,9 +11371,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 	case OpAtomicIAdd:
			
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
			
 
				-		forced_temporaries.insert(ops[1]);
			
 
				-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -11321,7 +11379,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
			
 
				 		forced_temporaries.insert(ops[1]);
			
 
				-		auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
			
 
				+		auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
			
 
				 		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
			
 
				 		flush_all_atomic_capable_variables();
			
 
				 		break;
			
@@ -11331,9 +11389,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 	case OpAtomicUMin:
			
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
			
 
				-		forced_temporaries.insert(ops[1]);
			
 
				-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -11341,36 +11397,28 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 	case OpAtomicUMax:
			
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
			
 
				-		forced_temporaries.insert(ops[1]);
			
 
				-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	case OpAtomicAnd:
			
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
			
 
				-		forced_temporaries.insert(ops[1]);
			
 
				-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	case OpAtomicOr:
			
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
			
 
				-		forced_temporaries.insert(ops[1]);
			
 
				-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	case OpAtomicXor:
			
 
				 	{
			
 
				 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
			
 
				-		forced_temporaries.insert(ops[1]);
			
 
				-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				-		flush_all_atomic_capable_variables();
			
 
				+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -11465,16 +11513,33 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 	case OpImageQueryLod:
			
 
				 	{
			
 
				+		const char *op = nullptr;
			
 
				 		if (!options.es && options.version < 400)
			
 
				 		{
			
 
				 			require_extension_internal("GL_ARB_texture_query_lod");
			
 
				 			// For some reason, the ARB spec is all-caps.
			
 
				-			GLSL_BFOP(textureQueryLOD);
			
 
				+			op = "textureQueryLOD";
			
 
				 		}
			
 
				 		else if (options.es)
			
 
				 			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
			
 
				 		else
			
 
				-			GLSL_BFOP(textureQueryLod);
			
 
				+			op = "textureQueryLod";
			
 
				+
			
 
				+		auto sampler_expr = to_expression(ops[2]);
			
 
				+		if (has_decoration(ops[2], DecorationNonUniform))
			
 
				+		{
			
 
				+			if (maybe_get_backing_variable(ops[2]))
			
 
				+				convert_non_uniform_expression(sampler_expr, ops[2]);
			
 
				+			else if (*backend.nonuniform_qualifier != '\0')
			
 
				+				sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
			
 
				+		}
			
 
				+
			
 
				+		bool forward = should_forward(ops[3]);
			
 
				+		emit_op(ops[0], ops[1],
			
 
				+		        join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
			
 
				+		        forward);
			
 
				+		inherit_expression_dependencies(ops[1], ops[2]);
			
 
				+		inherit_expression_dependencies(ops[1], ops[3]);
			
 
				 		register_control_dependent_expression(ops[1]);
			
 
				 		break;
			
 
				 	}
			
@@ -11504,7 +11569,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 		string expr;
			
 
				 		if (type.image.sampled == 2)
			
 
				-			expr = join("imageSamples(", to_expression(ops[2]), ")");
			
 
				+			expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
			
 
				 		else
			
 
				 			expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
			
 
				 
			
@@ -11615,10 +11680,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 						                  "operand mask was used.");
			
 
				 
			
 
				 					uint32_t samples = ops[5];
			
 
				-					imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
			
 
				+					imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
			
 
				 				}
			
 
				 				else
			
 
				-					imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
			
 
				+					imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
@@ -11630,13 +11695,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 						                  "operand mask was used.");
			
 
				 
			
 
				 					uint32_t samples = ops[5];
			
 
				-					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
			
 
				+					imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
			
 
				 					               to_expression(samples), ")");
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				 					// Implement subpass loads via texture barrier style sampling.
			
 
				-					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
			
 
				+					imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
			
 
				 				}
			
 
				 			}
			
 
				 			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
			
@@ -11667,12 +11732,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 						                  "operand mask was used.");
			
 
				 
			
 
				 					uint32_t samples = ops[5];
			
 
				-					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
			
 
				+					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
			
 
				 					          coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				-					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
			
 
				+					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
			
 
				 					          coord_expr, ", ", to_expression(sparse_texel_id), ");");
			
 
				 				}
			
 
				 				imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
			
@@ -11689,10 +11754,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 					uint32_t samples = ops[5];
			
 
				 					imgexpr =
			
 
				-					    join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
			
 
				+					    join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
			
 
				 				}
			
 
				 				else
			
 
				-					imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
			
 
				+					imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
			
 
				 			}
			
 
				 
			
 
				 			if (!sparse)
			
@@ -11733,9 +11798,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
			
 
				 
			
 
				 		auto expr = join(to_expression(ops[2]), ", ", coord_expr);
			
 
				-		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT))
			
 
				-			convert_non_uniform_expression(expression_type(ops[2]), expr);
			
 
				-
			
 
				 		auto &e = set<SPIRExpression>(id, expr, result_type, true);
			
 
				 
			
 
				 		// When using the pointer, we need to know which variable it is actually loaded from.
			
@@ -11778,11 +11840,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 			if (operands != ImageOperandsSampleMask || length != 5)
			
 
				 				SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
			
 
				 			uint32_t samples = ops[4];
			
 
				-			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
			
 
				+			statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
			
 
				 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
			
 
				 		}
			
 
				 		else
			
 
				-			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
			
 
				+			statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
			
 
				 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
			
 
				 
			
 
				 		if (var && variable_storage_is_aliased(*var))
			
@@ -11807,7 +11869,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 					SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
			
 
				 
			
 
				 				// The size of an image is always constant.
			
 
				-				expr = join("imageSize(", to_expression(ops[2]), ")");
			
 
				+				expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
@@ -12332,9 +12394,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		flush_control_dependent_expressions(current_emitting_block->self);
			
 
				 		break;
			
 
				 	case OpTraceNV:
			
 
				-		if (has_decoration(ops[0], DecorationNonUniformEXT))
			
 
				-			propagate_nonuniform_qualifier(ops[0]);
			
 
				-		statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
			
 
				+		statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
			
 
				 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
			
 
				 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
			
 
				 		          to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
			
@@ -12343,9 +12403,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 	case OpTraceRayKHR:
			
 
				 		if (!has_decoration(ops[10], DecorationLocation))
			
 
				 			SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
			
 
				-		if (has_decoration(ops[0], DecorationNonUniformEXT))
			
 
				-			propagate_nonuniform_qualifier(ops[0]);
			
 
				-		statement("traceRayEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
			
 
				+		statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
			
 
				 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
			
 
				 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
			
 
				 		          to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
			
@@ -12644,6 +12702,11 @@ string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uin
 
				 	return join(type_name, " ", name, type_to_array_glsl(type));
			
 
				 }
			
 
				 
			
 
				+bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
			
 
				+{
			
 
				+	return var.storage == storage;
			
 
				+}
			
 
				+
			
 
				 // Emit a structure member. Subclasses may override to modify output,
			
 
				 // or to dynamically add a padding member if needed.
			
 
				 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
			
@@ -12671,7 +12734,7 @@ void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
 
				 {
			
 
				 }
			
 
				 
			
 
				-const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
			
 
				+string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
			
 
				 {
			
 
				 	// GL_EXT_buffer_reference variables can be marked as restrict.
			
 
				 	if (flags.get(DecorationRestrictPointerEXT))
			
@@ -12683,6 +12746,11 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
 
				 	    type.basetype != SPIRType::Sampler)
			
 
				 		return "";
			
 
				 
			
 
				+	string qual;
			
 
				+
			
 
				+	if (flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
			
 
				+		qual = "precise ";
			
 
				+
			
 
				 	if (options.es)
			
 
				 	{
			
 
				 		auto &execution = get_entry_point();
			
@@ -12697,7 +12765,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
 
				 			                        options.fragment.default_int_precision == Options::Mediump &&
			
 
				 			                        execution.model == ExecutionModelFragment;
			
 
				 
			
 
				-			return implied_fmediump || implied_imediump ? "" : "mediump ";
			
 
				+			qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
@@ -12711,7 +12779,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
 
				 			                        execution.model == ExecutionModelFragment) ||
			
 
				 			                       (execution.model != ExecutionModelFragment));
			
 
				 
			
 
				-			return implied_fhighp || implied_ihighp ? "" : "highp ";
			
 
				+			qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
			
 
				 		}
			
 
				 	}
			
 
				 	else if (backend.allow_precision_qualifiers)
			
@@ -12719,18 +12787,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
 
				 		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
			
 
				 		// The default is highp however, so only emit mediump in the rare case that a shader has these.
			
 
				 		if (flags.get(DecorationRelaxedPrecision))
			
 
				-			return "mediump ";
			
 
				-		else
			
 
				-			return "";
			
 
				+			qual += "mediump ";
			
 
				 	}
			
 
				-	else
			
 
				-		return "";
			
 
				+
			
 
				+	return qual;
			
 
				 }
			
 
				 
			
 
				-const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
			
 
				+string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
			
 
				 {
			
 
				 	auto &type = expression_type(id);
			
 
				-	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
			
 
				+	bool use_precision_qualifiers = backend.allow_precision_qualifiers;
			
 
				 	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
			
 
				 	{
			
 
				 		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
			
@@ -12787,10 +12853,24 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
 
				 			res += "coherent ";
			
 
				 		if (flags.get(DecorationRestrict))
			
 
				 			res += "restrict ";
			
 
				+
			
 
				 		if (flags.get(DecorationNonWritable))
			
 
				 			res += "readonly ";
			
 
				+
			
 
				+		bool formatted_load = type.image.format == ImageFormatUnknown;
			
 
				 		if (flags.get(DecorationNonReadable))
			
 
				+		{
			
 
				 			res += "writeonly ";
			
 
				+			formatted_load = false;
			
 
				+		}
			
 
				+
			
 
				+		if (formatted_load)
			
 
				+		{
			
 
				+			if (!options.es)
			
 
				+				require_extension_internal("GL_EXT_shader_image_load_formatted");
			
 
				+			else
			
 
				+				SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	res += to_precision_qualifiers_glsl(id);
			
@@ -12869,7 +12949,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable)
 
				 		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
			
 
				 			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
			
 
				 	}
			
 
				-	else if (variable.initializer)
			
 
				+	else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
			
 
				 	{
			
 
				 		uint32_t expr = variable.initializer;
			
 
				 		if (ir.ids[expr].get_type() != TypeUndef)
			
@@ -13559,7 +13639,7 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 
				 		auto &var = get<SPIRVariable>(v);
			
 
				 		var.deferred_declaration = false;
			
 
				 
			
 
				-		if (var.storage == StorageClassWorkgroup)
			
 
				+		if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
			
 
				 		{
			
 
				 			// Special variable type which cannot have initializer,
			
 
				 			// need to be declared as standalone variables.
			
@@ -14761,7 +14841,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
				 				// The backend is responsible for setting this up, and redirection the return values as appropriate.
			
 
				 				if (ir.ids[block.return_value].get_type() != TypeUndef)
			
 
				 				{
			
 
				-					emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
			
 
				+					emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
			
 
				 					                get_expression_effective_storage_class(block.return_value));
			
 
				 				}
			
 
				 
			
@@ -14980,7 +15060,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
 
				 	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
			
 
				 }
			
 
				 
			
 
				-void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
			
 
				+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
			
 
				 {
			
 
				 	statement(lhs, " = ", to_expression(rhs_id), ";");
			
 
				 }
			
@@ -15120,6 +15200,8 @@ void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr,
 
				 	case BuiltInFragStencilRefEXT:
			
 
				 	case BuiltInInstanceCustomIndexNV:
			
 
				 	case BuiltInSampleMask:
			
 
				+	case BuiltInPrimitiveShadingRateKHR:
			
 
				+	case BuiltInShadingRateKHR:
			
 
				 		expected_type = SPIRType::Int;
			
 
				 		break;
			
 
				 
			
@@ -15164,6 +15246,8 @@ void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr,
 
				 	case BuiltInViewportIndex:
			
 
				 	case BuiltInFragStencilRefEXT:
			
 
				 	case BuiltInSampleMask:
			
 
				+	case BuiltInPrimitiveShadingRateKHR:
			
 
				+	case BuiltInShadingRateKHR:
			
 
				 		expected_type = SPIRType::Int;
			
 
				 		break;
			
 
				 
			
@@ -15179,64 +15263,62 @@ void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
			
 
				+void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
			
 
				 {
			
 
				 	if (*backend.nonuniform_qualifier == '\0')
			
 
				 		return;
			
 
				 
			
 
				-	// Handle SPV_EXT_descriptor_indexing.
			
 
				-	if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
			
 
				-	    type.basetype == SPIRType::Image || type.basetype == SPIRType::AccelerationStructure)
			
 
				-	{
			
 
				-		// The image/sampler ID must be declared as non-uniform.
			
 
				-		// However, it is not legal GLSL to have
			
 
				-		// nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
			
 
				-		// to the array indexing, like
			
 
				-		// samplers[nonuniformEXT(index)].
			
 
				-		// While the access chain will generally be nonuniformEXT, it's not necessarily so,
			
 
				-		// so we might have to fixup the OpLoad-ed expression late.
			
 
				+	auto *var = maybe_get_backing_variable(ptr_id);
			
 
				+	if (!var)
			
 
				+		return;
			
 
				 
			
 
				-		auto start_array_index = expr.find_first_of('[');
			
 
				+	if (var->storage != StorageClassUniformConstant &&
			
 
				+	    var->storage != StorageClassStorageBuffer &&
			
 
				+	    var->storage != StorageClassUniform)
			
 
				+		return;
			
 
				 
			
 
				-		if (start_array_index == string::npos)
			
 
				-			return;
			
 
				+	auto &backing_type = get<SPIRType>(var->basetype);
			
 
				+	if (backing_type.array.empty())
			
 
				+		return;
			
 
				 
			
 
				-		// Check for the edge case that a non-arrayed resource was marked to be nonuniform,
			
 
				-		// and the bracket we found is actually part of non-resource related data.
			
 
				-		if (expr.find_first_of(',') < start_array_index)
			
 
				-			return;
			
 
				+	// If we get here, we know we're accessing an arrayed resource which
			
 
				+	// might require nonuniform qualifier.
			
 
				 
			
 
				-		// We've opened a bracket, track expressions until we can close the bracket.
			
 
				-		// This must be our image index.
			
 
				-		size_t end_array_index = string::npos;
			
 
				-		unsigned bracket_count = 1;
			
 
				-		for (size_t index = start_array_index + 1; index < expr.size(); index++)
			
 
				+	auto start_array_index = expr.find_first_of('[');
			
 
				+
			
 
				+	if (start_array_index == string::npos)
			
 
				+		return;
			
 
				+
			
 
				+	// We've opened a bracket, track expressions until we can close the bracket.
			
 
				+	// This must be our resource index.
			
 
				+	size_t end_array_index = string::npos;
			
 
				+	unsigned bracket_count = 1;
			
 
				+	for (size_t index = start_array_index + 1; index < expr.size(); index++)
			
 
				+	{
			
 
				+		if (expr[index] == ']')
			
 
				 		{
			
 
				-			if (expr[index] == ']')
			
 
				+			if (--bracket_count == 0)
			
 
				 			{
			
 
				-				if (--bracket_count == 0)
			
 
				-				{
			
 
				-					end_array_index = index;
			
 
				-					break;
			
 
				-				}
			
 
				+				end_array_index = index;
			
 
				+				break;
			
 
				 			}
			
 
				-			else if (expr[index] == '[')
			
 
				-				bracket_count++;
			
 
				 		}
			
 
				+		else if (expr[index] == '[')
			
 
				+			bracket_count++;
			
 
				+	}
			
 
				 
			
 
				-		assert(bracket_count == 0);
			
 
				+	assert(bracket_count == 0);
			
 
				 
			
 
				-		// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
			
 
				-		// nothing we can do here to express that.
			
 
				-		if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
			
 
				-			return;
			
 
				+	// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
			
 
				+	// nothing we can do here to express that.
			
 
				+	if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
			
 
				+		return;
			
 
				 
			
 
				-		start_array_index++;
			
 
				+	start_array_index++;
			
 
				 
			
 
				-		expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
			
 
				-		            expr.substr(start_array_index, end_array_index - start_array_index), ")",
			
 
				-		            expr.substr(end_array_index, string::npos));
			
 
				-	}
			
 
				+	expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
			
 
				+	            expr.substr(start_array_index, end_array_index - start_array_index), ")",
			
 
				+	            expr.substr(end_array_index, string::npos));
			
 
				 }
			
 
				 
			
 
				 void CompilerGLSL::emit_block_hints(const SPIRBlock &)
			
@@ -15342,40 +15424,6 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
			
 
				-{
			
 
				-	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
			
 
				-	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
			
 
				-	// Back-propagate the qualifier based on the expression dependency chain.
			
 
				-
			
 
				-	if (!has_decoration(id, DecorationNonUniformEXT))
			
 
				-	{
			
 
				-		set_decoration(id, DecorationNonUniformEXT);
			
 
				-		force_recompile();
			
 
				-	}
			
 
				-
			
 
				-	auto *e = maybe_get<SPIRExpression>(id);
			
 
				-	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
			
 
				-	auto *chain = maybe_get<SPIRAccessChain>(id);
			
 
				-	if (e)
			
 
				-	{
			
 
				-		for (auto &expr : e->expression_dependencies)
			
 
				-			propagate_nonuniform_qualifier(expr);
			
 
				-		for (auto &expr : e->implied_read_expressions)
			
 
				-			propagate_nonuniform_qualifier(expr);
			
 
				-	}
			
 
				-	else if (combined)
			
 
				-	{
			
 
				-		propagate_nonuniform_qualifier(combined->image);
			
 
				-		propagate_nonuniform_qualifier(combined->sampler);
			
 
				-	}
			
 
				-	else if (chain)
			
 
				-	{
			
 
				-		for (auto &expr : chain->implied_read_expressions)
			
 
				-			propagate_nonuniform_qualifier(expr);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
			
 
				                                           SmallVector<uint32_t> chain)
			
 
				 {
			
@@ -15788,3 +15836,149 @@ void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID
 
				 		expr = join("spvWorkaroundRowMajor(", expr, ")");
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
			
 
				+{
			
 
				+	masked_output_locations.insert({ location, component });
			
 
				+}
			
 
				+
			
 
				+void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
			
 
				+{
			
 
				+	masked_output_builtins.insert(builtin);
			
 
				+}
			
 
				+
			
 
				+bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
			
 
				+{
			
 
				+	auto &type = get<SPIRType>(var.basetype);
			
 
				+	bool is_block = has_decoration(type.self, DecorationBlock);
			
 
				+	// Blocks by themselves are never masked. Must be masked per-member.
			
 
				+	if (is_block)
			
 
				+		return false;
			
 
				+
			
 
				+	bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
			
 
				+
			
 
				+	if (is_builtin)
			
 
				+	{
			
 
				+		return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		if (!has_decoration(var.self, DecorationLocation))
			
 
				+			return false;
			
 
				+
			
 
				+		return is_stage_output_location_masked(
			
 
				+				get_decoration(var.self, DecorationLocation),
			
 
				+				get_decoration(var.self, DecorationComponent));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
			
 
				+{
			
 
				+	auto &type = get<SPIRType>(var.basetype);
			
 
				+	bool is_block = has_decoration(type.self, DecorationBlock);
			
 
				+	if (!is_block)
			
 
				+		return false;
			
 
				+
			
 
				+	BuiltIn builtin = BuiltInMax;
			
 
				+	if (is_member_builtin(type, index, &builtin))
			
 
				+	{
			
 
				+		return is_stage_output_builtin_masked(builtin);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		uint32_t location = get_declared_member_location(var, index, strip_array);
			
 
				+		uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
			
 
				+		return is_stage_output_location_masked(location, component);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
			
 
				+{
			
 
				+	return masked_output_locations.count({ location, component }) != 0;
			
 
				+}
			
 
				+
			
 
				+bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
			
 
				+{
			
 
				+	return masked_output_builtins.count(builtin) != 0;
			
 
				+}
			
 
				+
			
 
				+uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
			
 
				+{
			
 
				+	auto &block_type = get<SPIRType>(var.basetype);
			
 
				+	if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
			
 
				+		return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
			
 
				+	else
			
 
				+		return get_accumulated_member_location(var, mbr_idx, strip_array);
			
 
				+}
			
 
				+
			
 
				+uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
			
 
				+{
			
 
				+	auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
			
 
				+	uint32_t location = get_decoration(var.self, DecorationLocation);
			
 
				+
			
 
				+	for (uint32_t i = 0; i < mbr_idx; i++)
			
 
				+	{
			
 
				+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
			
 
				+
			
 
				+		// Start counting from any place we have a new location decoration.
			
 
				+		if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
			
 
				+			location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
			
 
				+
			
 
				+		uint32_t location_count = type_to_location_count(mbr_type);
			
 
				+		location += location_count;
			
 
				+	}
			
 
				+
			
 
				+	return location;
			
 
				+}
			
 
				+
			
 
				+StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
			
 
				+{
			
 
				+	auto *var = maybe_get_backing_variable(ptr);
			
 
				+
			
 
				+	// If the expression has been lowered to a temporary, we need to use the Generic storage class.
			
 
				+	// We're looking for the effective storage class of a given expression.
			
 
				+	// An access chain or forwarded OpLoads from such access chains
			
 
				+	// will generally have the storage class of the underlying variable, but if the load was not forwarded
			
 
				+	// we have lost any address space qualifiers.
			
 
				+	bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
			
 
				+	                        (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
			
 
				+
			
 
				+	if (var && !forced_temporary)
			
 
				+	{
			
 
				+		if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
			
 
				+			return StorageClassWorkgroup;
			
 
				+		if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
			
 
				+			return StorageClassStorageBuffer;
			
 
				+
			
 
				+		// Normalize SSBOs to StorageBuffer here.
			
 
				+		if (var->storage == StorageClassUniform &&
			
 
				+		    has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
			
 
				+			return StorageClassStorageBuffer;
			
 
				+		else
			
 
				+			return var->storage;
			
 
				+	}
			
 
				+	else
			
 
				+		return expression_type(ptr).storage;
			
 
				+}
			
 
				+
			
 
				+uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
			
 
				+{
			
 
				+	uint32_t count;
			
 
				+	if (type.basetype == SPIRType::Struct)
			
 
				+	{
			
 
				+		uint32_t mbr_count = uint32_t(type.member_types.size());
			
 
				+		count = 0;
			
 
				+		for (uint32_t i = 0; i < mbr_count; i++)
			
 
				+			count += type_to_location_count(get<SPIRType>(type.member_types[i]));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		count = type.columns > 1 ? type.columns : 1;
			
 
				+	}
			
 
				+
			
 
				+	uint32_t dim_count = uint32_t(type.array.size());
			
 
				+	for (uint32_t i = 0; i < dim_count; i++)
			
 
				+		count *= to_array_size_literal(type, i);
			
 
				+
			
 
				+	return count;
			
 
				+}
			
--- a/3rdparty/spirv-cross/spirv_glsl.hpp
+++ b/3rdparty/spirv-cross/spirv_glsl.hpp
@@ -65,7 +65,8 @@ enum AccessChainFlagBits
 
				 	ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2,
			
 
				 	ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3,
			
 
				 	ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4,
			
 
				-	ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5
			
 
				+	ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5,
			
 
				+	ACCESS_CHAIN_FORCE_COMPOSITE_BIT = 1 << 6
			
 
				 };
			
 
				 typedef uint32_t AccessChainFlags;
			
 
				 
			
@@ -250,6 +251,16 @@ public:
 
				 	// - Images which are statically used at least once with Dref opcodes.
			
 
				 	bool variable_is_depth_or_compare(VariableID id) const;
			
 
				 
			
 
				+	// If a shader output is active in this stage, but inactive in a subsequent stage,
			
 
				+	// this can be signalled here. This can be used to work around certain cross-stage matching problems
			
 
				+	// which plagues MSL and HLSL in certain scenarios.
			
 
				+	// An output which matches one of these will not be emitted in stage output interfaces, but rather treated as a private
			
 
				+	// variable.
			
 
				+	// This option is only meaningful for MSL and HLSL, since GLSL matches by location directly.
			
 
				+	// Masking builtins only takes effect if the builtin in question is part of the stage output interface.
			
 
				+	void mask_stage_output_by_location(uint32_t location, uint32_t component);
			
 
				+	void mask_stage_output_by_builtin(spv::BuiltIn builtin);
			
 
				+
			
 
				 protected:
			
 
				 	struct ShaderSubgroupSupportHelper
			
 
				 	{
			
@@ -375,6 +386,7 @@ protected:
 
				 	virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector);
			
 
				 	virtual void emit_fixup();
			
 
				 	virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
			
 
				+	virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const;
			
 
				 	virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
			
 
				 
			
 
				 	struct TextureFunctionBaseArguments
			
@@ -569,6 +581,7 @@ protected:
 
				 		bool use_array_constructor = false;
			
 
				 		bool needs_row_major_load_workaround = false;
			
 
				 		bool support_pointer_to_pointer = false;
			
 
				+		bool support_precise_qualifier = false;
			
 
				 	} backend;
			
 
				 
			
 
				 	void emit_struct(SPIRType &type);
			
@@ -616,6 +629,8 @@ protected:
 
				 	void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
			
 
				 	                          const char *op);
			
 
				 	void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
			
 
				+	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
			
 
				+	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op);
			
 
				 
			
 
				 	void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
			
 
				 	                             SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type);
			
@@ -661,6 +676,9 @@ protected:
 
				 	std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
			
 
				 	                                  AccessChainMeta *meta);
			
 
				 
			
 
				+	spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
			
 
				+	virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
			
 
				+
			
 
				 	virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
			
 
				 	                                                    spv::StorageClass storage, bool &is_packed);
			
 
				 
			
@@ -691,6 +709,7 @@ protected:
 
				 	void emit_uninitialized_temporary(uint32_t type, uint32_t id);
			
 
				 	SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
			
 
				 	void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
			
 
				+	std::string to_non_uniform_aware_expression(uint32_t id);
			
 
				 	std::string to_expression(uint32_t id, bool register_expression_read = true);
			
 
				 	std::string to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset);
			
 
				 	std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type);
			
@@ -716,17 +735,17 @@ protected:
 
				 	virtual std::string to_qualifiers_glsl(uint32_t id);
			
 
				 	void fixup_io_block_patch_qualifiers(const SPIRVariable &var);
			
 
				 	void emit_output_variable_initializer(const SPIRVariable &var);
			
 
				-	const char *to_precision_qualifiers_glsl(uint32_t id);
			
 
				+	std::string to_precision_qualifiers_glsl(uint32_t id);
			
 
				 	virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
			
 
				-	const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
			
 
				+	std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
			
 
				 	const char *format_to_glsl(spv::ImageFormat format);
			
 
				 	virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
			
 
				 	virtual std::string to_interpolation_qualifiers(const Bitset &flags);
			
 
				 	std::string layout_for_variable(const SPIRVariable &variable);
			
 
				 	std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
			
 
				 	virtual bool skip_argument(uint32_t id) const;
			
 
				-	virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
			
 
				-	                             spv::StorageClass rhs_storage);
			
 
				+	virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
			
 
				+	                             spv::StorageClass lhs_storage, spv::StorageClass rhs_storage);
			
 
				 	virtual void emit_block_hints(const SPIRBlock &block);
			
 
				 	virtual std::string to_initializer_expression(const SPIRVariable &var);
			
 
				 	virtual std::string to_zero_initialized_expression(uint32_t type_id);
			
@@ -741,6 +760,7 @@ protected:
 
				 	uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
			
 
				 	uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
			
 
				 	uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
			
 
				+	uint32_t type_to_location_count(const SPIRType &type) const;
			
 
				 
			
 
				 	std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg);
			
 
				 	virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type);
			
@@ -881,7 +901,7 @@ protected:
 
				 	virtual void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type);
			
 
				 	void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr);
			
 
				 	bool unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id);
			
 
				-	void convert_non_uniform_expression(const SPIRType &type, std::string &expr);
			
 
				+	void convert_non_uniform_expression(std::string &expr, uint32_t ptr_id);
			
 
				 
			
 
				 	void handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id);
			
 
				 	void disallow_forwarding_in_expression_chain(const SPIRExpression &expr);
			
@@ -900,10 +920,17 @@ protected:
 
				 	void fixup_type_alias();
			
 
				 	void reorder_type_alias();
			
 
				 
			
 
				-	void propagate_nonuniform_qualifier(uint32_t id);
			
 
				-
			
 
				 	static const char *vector_swizzle(int vecsize, int index);
			
 
				 
			
 
				+	bool is_stage_output_location_masked(uint32_t location, uint32_t component) const;
			
 
				+	bool is_stage_output_builtin_masked(spv::BuiltIn builtin) const;
			
 
				+	bool is_stage_output_variable_masked(const SPIRVariable &var) const;
			
 
				+	bool is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const;
			
 
				+	uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const;
			
 
				+	uint32_t get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const;
			
 
				+	std::unordered_set<LocationComponentPair, InternalHasher> masked_output_locations;
			
 
				+	std::unordered_set<uint32_t> masked_output_builtins;
			
 
				+
			
 
				 private:
			
 
				 	void init();
			
 
				 };
			
--- a/3rdparty/spirv-cross/spirv_hlsl.cpp
+++ b/3rdparty/spirv-cross/spirv_hlsl.cpp
@@ -570,7 +570,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 
				 		switch (builtin)
			
 
				 		{
			
 
				 		case BuiltInPosition:
			
 
				-			type = "float4";
			
 
				+			type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
			
 
				 			semantic = legacy ? "POSITION" : "SV_Position";
			
 
				 			break;
			
 
				 
			
@@ -818,8 +818,8 @@ string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
 
				 		res += "patch "; // Seems to be different in actual HLSL.
			
 
				 	if (flags.get(DecorationSample))
			
 
				 		res += "sample ";
			
 
				-	if (flags.get(DecorationInvariant))
			
 
				-		res += "invariant "; // Not supported?
			
 
				+	if (flags.get(DecorationInvariant) && backend.support_precise_qualifier)
			
 
				+		res += "precise "; // Not supported?
			
 
				 
			
 
				 	return res;
			
 
				 }
			
@@ -865,24 +865,10 @@ void CompilerHLSL::emit_io_block(const SPIRVariable &var)
 
				 	begin_scope();
			
 
				 	type.member_name_cache.clear();
			
 
				 
			
 
				-	uint32_t base_location = get_decoration(var.self, DecorationLocation);
			
 
				-
			
 
				 	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
			
 
				 	{
			
 
				-		string semantic;
			
 
				-		if (has_member_decoration(type.self, i, DecorationLocation))
			
 
				-		{
			
 
				-			uint32_t location = get_member_decoration(type.self, i, DecorationLocation);
			
 
				-			semantic = join(" : ", to_semantic(location, execution.model, var.storage));
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			// If the block itself has a location, but not its members, use the implicit location.
			
 
				-			// There could be a conflict if the block members partially specialize the locations.
			
 
				-			// It is unclear how SPIR-V deals with this. Assume this does not happen for now.
			
 
				-			uint32_t location = base_location + i;
			
 
				-			semantic = join(" : ", to_semantic(location, execution.model, var.storage));
			
 
				-		}
			
 
				+		uint32_t location = get_accumulated_member_location(var, i, false);
			
 
				+		string semantic = join(" : ", to_semantic(location, execution.model, var.storage));
			
 
				 
			
 
				 		add_member_name(type, i);
			
 
				 
			
@@ -2249,7 +2235,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 
				 
			
 
				 string CompilerHLSL::to_sampler_expression(uint32_t id)
			
 
				 {
			
 
				-	auto expr = join("_", to_expression(id));
			
 
				+	auto expr = join("_", to_non_uniform_aware_expression(id));
			
 
				 	auto index = expr.find_first_of('[');
			
 
				 	if (index == string::npos)
			
 
				 	{
			
@@ -2754,13 +2740,16 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse)
 
				 	bool proj = false;
			
 
				 	const uint32_t *opt = nullptr;
			
 
				 	auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img);
			
 
				-	auto img_expr = to_expression(combined_image ? combined_image->image : img);
			
 
				 
			
 
				-	inherited_expressions.push_back(coord);
			
 
				+	if (combined_image && has_decoration(img, DecorationNonUniform))
			
 
				+	{
			
 
				+		set_decoration(combined_image->image, DecorationNonUniform);
			
 
				+		set_decoration(combined_image->sampler, DecorationNonUniform);
			
 
				+	}
			
 
				 
			
 
				-	// Make sure non-uniform decoration is back-propagated to where it needs to be.
			
 
				-	if (has_decoration(img, DecorationNonUniformEXT))
			
 
				-		propagate_nonuniform_qualifier(img);
			
 
				+	auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img);
			
 
				+
			
 
				+	inherited_expressions.push_back(coord);
			
 
				 
			
 
				 	switch (op)
			
 
				 	{
			
@@ -3016,7 +3005,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse)
 
				 	{
			
 
				 		string sampler_expr;
			
 
				 		if (combined_image)
			
 
				-			sampler_expr = to_expression(combined_image->sampler);
			
 
				+			sampler_expr = to_non_uniform_aware_expression(combined_image->sampler);
			
 
				 		else
			
 
				 			sampler_expr = to_sampler_expression(img);
			
 
				 		expr += sampler_expr;
			
@@ -3812,6 +3801,10 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR
 
				 		SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and "
			
 
				 		                  "native 16-bit types are enabled.");
			
 
				 
			
 
				+	string base = chain.base;
			
 
				+	if (has_decoration(chain.self, DecorationNonUniform))
			
 
				+		convert_non_uniform_expression(base, chain.self);
			
 
				+
			
 
				 	bool templated_load = hlsl_options.shader_model >= 62;
			
 
				 	string load_expr;
			
 
				 
			
@@ -3844,7 +3837,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR
 
				 		if (templated_load)
			
 
				 			load_op = "Load";
			
 
				 
			
 
				-		load_expr = join(chain.base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")");
			
 
				+		load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")");
			
 
				 	}
			
 
				 	else if (type.columns == 1)
			
 
				 	{
			
@@ -3866,7 +3859,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR
 
				 
			
 
				 		for (uint32_t r = 0; r < type.vecsize; r++)
			
 
				 		{
			
 
				-			load_expr += join(chain.base, ".Load", template_expr, "(", chain.dynamic_index,
			
 
				+			load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
			
 
				 			                  chain.static_index + r * chain.matrix_stride, ")");
			
 
				 			if (r + 1 < type.vecsize)
			
 
				 				load_expr += ", ";
			
@@ -3915,7 +3908,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR
 
				 
			
 
				 		for (uint32_t c = 0; c < type.columns; c++)
			
 
				 		{
			
 
				-			load_expr += join(chain.base, ".", load_op, template_expr, "(", chain.dynamic_index,
			
 
				+			load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index,
			
 
				 			                  chain.static_index + c * chain.matrix_stride, ")");
			
 
				 			if (c + 1 < type.columns)
			
 
				 				load_expr += ", ";
			
@@ -3944,7 +3937,7 @@ void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIR
 
				 		{
			
 
				 			for (uint32_t r = 0; r < type.vecsize; r++)
			
 
				 			{
			
 
				-				load_expr += join(chain.base, ".Load", template_expr, "(", chain.dynamic_index,
			
 
				+				load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
			
 
				 				                  chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")");
			
 
				 
			
 
				 				if ((r + 1 < type.vecsize) || (c + 1 < type.columns))
			
@@ -3981,9 +3974,6 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
 
				 		uint32_t id = ops[1];
			
 
				 		uint32_t ptr = ops[2];
			
 
				 
			
 
				-		if (has_decoration(ptr, DecorationNonUniformEXT))
			
 
				-			propagate_nonuniform_qualifier(ptr);
			
 
				-
			
 
				 		auto &type = get<SPIRType>(result_type);
			
 
				 		bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct;
			
 
				 
			
@@ -4122,9 +4112,6 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 
				 	// Make sure we trigger a read of the constituents in the access chain.
			
 
				 	track_expression_read(chain.self);
			
 
				 
			
 
				-	if (has_decoration(chain.self, DecorationNonUniformEXT))
			
 
				-		propagate_nonuniform_qualifier(chain.self);
			
 
				-
			
 
				 	SPIRType target_type;
			
 
				 	target_type.basetype = SPIRType::UInt;
			
 
				 	target_type.vecsize = type.vecsize;
			
@@ -4148,6 +4135,10 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 
				 
			
 
				 	bool templated_store = hlsl_options.shader_model >= 62;
			
 
				 
			
 
				+	auto base = chain.base;
			
 
				+	if (has_decoration(chain.self, DecorationNonUniform))
			
 
				+		convert_non_uniform_expression(base, chain.self);
			
 
				+
			
 
				 	string template_expr;
			
 
				 	if (templated_store)
			
 
				 		template_expr = join("<", type_to_glsl(type), ">");
			
@@ -4183,7 +4174,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 
				 		}
			
 
				 		else
			
 
				 			store_op = "Store";
			
 
				-		statement(chain.base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ",
			
 
				+		statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ",
			
 
				 		          store_expr, ");");
			
 
				 	}
			
 
				 	else if (type.columns == 1)
			
@@ -4214,7 +4205,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 
				 					store_expr = join(bitcast_op, "(", store_expr, ")");
			
 
				 			}
			
 
				 
			
 
				-			statement(chain.base, ".Store", template_expr, "(", chain.dynamic_index,
			
 
				+			statement(base, ".Store", template_expr, "(", chain.dynamic_index,
			
 
				 			          chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");");
			
 
				 		}
			
 
				 	}
			
@@ -4258,7 +4249,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 
				 					store_expr = join(bitcast_op, "(", store_expr, ")");
			
 
				 			}
			
 
				 
			
 
				-			statement(chain.base, ".", store_op, template_expr, "(", chain.dynamic_index,
			
 
				+			statement(base, ".", store_op, template_expr, "(", chain.dynamic_index,
			
 
				 			          chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");");
			
 
				 		}
			
 
				 	}
			
@@ -4282,7 +4273,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 
				 				auto bitcast_op = bitcast_glsl_op(target_type, type);
			
 
				 				if (!bitcast_op.empty())
			
 
				 					store_expr = join(bitcast_op, "(", store_expr, ")");
			
 
				-				statement(chain.base, ".Store", template_expr, "(", chain.dynamic_index,
			
 
				+				statement(base, ".Store", template_expr, "(", chain.dynamic_index,
			
 
				 				          chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");");
			
 
				 			}
			
 
				 		}
			
@@ -4384,9 +4375,6 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 
				 			inherit_expression_dependencies(ops[1], ops[i]);
			
 
				 			add_implied_read_expression(e, ops[i]);
			
 
				 		}
			
 
				-
			
 
				-		if (has_decoration(ops[1], DecorationNonUniformEXT))
			
 
				-			propagate_nonuniform_qualifier(ops[1]);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -4486,13 +4474,16 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 
				 
			
 
				 		if (data_type.storage == StorageClassImage || !chain)
			
 
				 		{
			
 
				-			statement(atomic_op, "(", to_expression(ops[0]), ", ", to_expression(ops[3]), ", ", to_expression(tmp_id),
			
 
				-			          ");");
			
 
				+			statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ",
			
 
				+			          to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				+			string base = chain->base;
			
 
				+			if (has_decoration(chain->self, DecorationNonUniform))
			
 
				+				convert_non_uniform_expression(base, chain->self);
			
 
				 			// RWByteAddress buffer is always uint in its underlying type.
			
 
				-			statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ",
			
 
				+			statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ",
			
 
				 			          to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
			
 
				 		}
			
 
				 	}
			
@@ -4510,14 +4501,17 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 
				 		SPIRType::BaseType expr_type;
			
 
				 		if (data_type.storage == StorageClassImage || !chain)
			
 
				 		{
			
 
				-			statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
			
 
				+			statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
			
 
				 			expr_type = data_type.basetype;
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				 			// RWByteAddress buffer is always uint in its underlying type.
			
 
				+			string base = chain->base;
			
 
				+			if (has_decoration(chain->self, DecorationNonUniform))
			
 
				+				convert_non_uniform_expression(base, chain->self);
			
 
				 			expr_type = SPIRType::UInt;
			
 
				-			statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr,
			
 
				+			statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr,
			
 
				 			          ", ", to_name(id), ");");
			
 
				 		}
			
 
				 
			
@@ -4618,13 +4612,35 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i)
 
				 	}
			
 
				 
			
 
				 	case OpGroupNonUniformShuffle:
			
 
				-		SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL.");
			
 
				+		emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
			
 
				+		break;
			
 
				 	case OpGroupNonUniformShuffleXor:
			
 
				-		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL.");
			
 
				+	{
			
 
				+		bool forward = should_forward(ops[3]);
			
 
				+		emit_op(ops[0], ops[1],
			
 
				+		        join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
			
 
				+		             "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward);
			
 
				+		inherit_expression_dependencies(ops[1], ops[3]);
			
 
				+		break;
			
 
				+	}
			
 
				 	case OpGroupNonUniformShuffleUp:
			
 
				-		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL.");
			
 
				+	{
			
 
				+		bool forward = should_forward(ops[3]);
			
 
				+		emit_op(ops[0], ops[1],
			
 
				+		        join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
			
 
				+		             "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward);
			
 
				+		inherit_expression_dependencies(ops[1], ops[3]);
			
 
				+		break;
			
 
				+	}
			
 
				 	case OpGroupNonUniformShuffleDown:
			
 
				-		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL.");
			
 
				+	{
			
 
				+		bool forward = should_forward(ops[3]);
			
 
				+		emit_op(ops[0], ops[1],
			
 
				+		        join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
			
 
				+		             "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward);
			
 
				+		inherit_expression_dependencies(ops[1], ops[3]);
			
 
				+		break;
			
 
				+	}
			
 
				 
			
 
				 	case OpGroupNonUniformAll:
			
 
				 		emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue");
			
@@ -5150,7 +5166,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 		auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
			
 
				 		statement("uint ", dummy_samples_levels, ";");
			
 
				 
			
 
				-		auto expr = join("spvTextureSize(", to_expression(ops[2]), ", ",
			
 
				+		auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ",
			
 
				 		                 bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")");
			
 
				 
			
 
				 		auto &restype = get<SPIRType>(ops[0]);
			
@@ -5176,9 +5192,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 		string expr;
			
 
				 		if (uav)
			
 
				-			expr = join("spvImageSize(", to_expression(ops[2]), ", ", dummy_samples_levels, ")");
			
 
				+			expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")");
			
 
				 		else
			
 
				-			expr = join("spvTextureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")");
			
 
				+			expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")");
			
 
				 
			
 
				 		auto &restype = get<SPIRType>(ops[0]);
			
 
				 		expr = bitcast_expression(restype, SPIRType::UInt, expr);
			
@@ -5208,9 +5224,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 		statement(variable_decl(type, to_name(id)), ";");
			
 
				 
			
 
				 		if (uav)
			
 
				-			statement("spvImageSize(", to_expression(ops[2]), ", ", to_name(id), ");");
			
 
				+			statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");");
			
 
				 		else
			
 
				-			statement("spvTextureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");");
			
 
				+			statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");");
			
 
				 
			
 
				 		auto &restype = get<SPIRType>(ops[0]);
			
 
				 		auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id));
			
@@ -5241,16 +5257,16 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 				if (operands != ImageOperandsSampleMask || instruction.length != 6)
			
 
				 					SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
			
 
				 				uint32_t sample = ops[5];
			
 
				-				imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
			
 
				+				imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
			
 
				 			}
			
 
				 			else
			
 
				-				imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
			
 
				+				imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
			
 
				 
			
 
				 			pure = true;
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]");
			
 
				+			imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]");
			
 
				 			// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
			
 
				 			// except that the underlying type changes how the data is interpreted.
			
 
				 
			
@@ -5299,7 +5315,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 			value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr);
			
 
				 		}
			
 
				 
			
 
				-		statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
			
 
				+		statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
			
 
				 		if (var && variable_storage_is_aliased(*var))
			
 
				 			flush_all_aliased_variables();
			
 
				 		break;
			
@@ -5311,10 +5327,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 		uint32_t id = ops[1];
			
 
				 
			
 
				 		auto expr = to_expression(ops[2]);
			
 
				-		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT))
			
 
				-			convert_non_uniform_expression(expression_type(ops[2]), expr);
			
 
				 		expr += join("[", to_expression(ops[3]), "]");
			
 
				-
			
 
				 		auto &e = set<SPIRExpression>(id, expr, result_type, true);
			
 
				 
			
 
				 		// When using the pointer, we need to know which variable it is actually loaded from.
			
@@ -5492,7 +5505,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 	case OpArrayLength:
			
 
				 	{
			
 
				-		auto *var = maybe_get<SPIRVariable>(ops[2]);
			
 
				+		auto *var = maybe_get_backing_variable(ops[2]);
			
 
				 		if (!var)
			
 
				 			SPIRV_CROSS_THROW("Array length must point directly to an SSBO block.");
			
 
				 
			
@@ -5502,7 +5515,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 		// This must be 32-bit uint, so we're good to go.
			
 
				 		emit_uninitialized_temporary_expression(ops[0], ops[1]);
			
 
				-		statement(to_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
			
 
				+		statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
			
 
				 		uint32_t offset = type_struct_member_offset(type, ops[3]);
			
 
				 		uint32_t stride = type_struct_member_array_stride(type, ops[3]);
			
 
				 		statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";");
			
@@ -5718,6 +5731,9 @@ string CompilerHLSL::compile()
 
				 	backend.nonuniform_qualifier = "NonUniformResourceIndex";
			
 
				 	backend.support_case_fallthrough = false;
			
 
				 
			
 
				+	// SM 4.1 does not support precise for some reason.
			
 
				+	backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
			
 
				+
			
 
				 	fixup_type_alias();
			
 
				 	reorder_type_alias();
			
 
				 	build_function_control_flow_graphs_and_analyze();
			
--- a/3rdparty/spirv-cross/spirv_msl.cpp
+++ b/3rdparty/spirv-cross/spirv_msl.cpp
--- a/3rdparty/spirv-cross/spirv_msl.hpp
+++ b/3rdparty/spirv-cross/spirv_msl.hpp
@@ -71,15 +71,23 @@ struct MSLShaderInput
 
				 // resources consumed by this binding, if the binding represents an array of resources.
			
 
				 // If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value
			
 
				 // will be used to declare the array size in MSL, which does not support run-time-sized arrays.
			
 
				-// For resources that are not held in a run-time-sized array, the count field does not need to be populated.
			
 
				+// If pad_argument_buffer_resources is enabled, the base_type and count values are used to
			
 
				+// specify the base type and array size of the resource in the argument buffer, if that resource
			
 
				+// is not defined and used by the shader. With pad_argument_buffer_resources enabled, this
			
 
				+// information will be used to pad the argument buffer structure, in order to align that
			
 
				+// structure consistently for all uses, across all shaders, of the descriptor set represented
			
 
				+// by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not
			
 
				+// need to be populated, and if the resource is also not a run-time sized array, the count
			
 
				+// field does not need to be populated.
			
 
				 // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set,
			
 
				 // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we
			
 
				 // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure.
			
 
				-// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a
			
 
				-// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
			
 
				+// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will
			
 
				+// become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
			
 
				 struct MSLResourceBinding
			
 
				 {
			
 
				 	spv::ExecutionModel stage = spv::ExecutionModelMax;
			
 
				+	SPIRType::BaseType basetype = SPIRType::Unknown;
			
 
				 	uint32_t desc_set = 0;
			
 
				 	uint32_t binding = 0;
			
 
				 	uint32_t count = 0;
			
@@ -346,6 +354,19 @@ public:
 
				 		// and would otherwise declare a different IAB.
			
 
				 		bool force_active_argument_buffer_resources = false;
			
 
				 
			
 
				+		// Aligns each resource in an argument buffer to its assigned index value, id(N),
			
 
				+		// by adding synthetic padding members in the argument buffer struct for any resources
			
 
				+		// in the argument buffer that are not defined and used by the shader. This allows
			
 
				+		// the shader to index into the correct argument in a descriptor set argument buffer
			
 
				+		// that is shared across shaders, where not all resources in the argument buffer are
			
 
				+		// defined in each shader. For this to work, an MSLResourceBinding must be provided for
			
 
				+		// all descriptors in any descriptor set held in an argument buffer in the shader, and
			
 
				+		// that MSLResourceBinding must have the basetype and count members populated correctly.
			
 
				+		// The implementation here assumes any inline blocks in the argument buffer is provided
			
 
				+		// in a Metal buffer, and doesn't take into consideration inline blocks that are
			
 
				+		// optionally embedded directly into the argument buffer via add_inline_uniform_block().
			
 
				+		bool pad_argument_buffer_resources = false;
			
 
				+
			
 
				 		// Forces the use of plain arrays, which works around certain driver bugs on certain versions
			
 
				 		// of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210.
			
 
				 		// May reduce performance in scenarios where arrays are copied around as value-types.
			
@@ -634,6 +655,7 @@ protected:
 
				 		SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
			
 
				 		SPVFuncImplFMul,
			
 
				 		SPVFuncImplFAdd,
			
 
				+		SPVFuncImplFSub,
			
 
				 		SPVFuncImplCubemapTo2DArrayFace,
			
 
				 		SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
			
 
				 		SPVFuncImplInverse4x4,
			
@@ -715,6 +737,8 @@ protected:
 
				 	// Threadgroup arrays can't have a wrapper type
			
 
				 	std::string variable_decl(const SPIRVariable &variable) override;
			
 
				 
			
 
				+	bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override;
			
 
				+
			
 
				 	// GCC workaround of lambdas calling protected functions (for older GCC versions)
			
 
				 	std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override;
			
 
				 
			
@@ -780,8 +804,11 @@ protected:
 
				 		};
			
 
				 		std::unordered_map<uint32_t, LocationMeta> location_meta;
			
 
				 		bool strip_array = false;
			
 
				+		bool allow_local_declaration = false;
			
 
				 	};
			
 
				 
			
 
				+	std::string to_tesc_invocation_id();
			
 
				+	void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array);
			
 
				 	void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type,
			
 
				 	                                     SPIRVariable &var, InterfaceBlockMeta &meta);
			
 
				 	void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
			
@@ -794,14 +821,15 @@ protected:
 
				 	void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
			
 
				 	                                                      SPIRType &ib_type, SPIRVariable &var, uint32_t index,
			
 
				 	                                                      InterfaceBlockMeta &meta);
			
 
				-	uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array);
			
 
				 	void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var);
			
 
				 
			
 
				 	void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id);
			
 
				 
			
 
				-	void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, spv::StorageClass storage);
			
 
				+	void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
			
 
				+	                                     spv::StorageClass storage, bool fallback = false);
			
 
				 	uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin);
			
 
				-	uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components = 0);
			
 
				+	uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location,
			
 
				+	                                   uint32_t num_components, bool strip_array);
			
 
				 
			
 
				 	void emit_custom_templates();
			
 
				 	void emit_custom_functions();
			
@@ -886,8 +914,8 @@ protected:
 
				 	void add_pragma_line(const std::string &line);
			
 
				 	void add_typedef_line(const std::string &line);
			
 
				 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
			
 
				-	void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
			
 
				-	                     spv::StorageClass rhs_storage) override;
			
 
				+	void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
			
 
				+	                     spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override;
			
 
				 	void build_implicit_builtins();
			
 
				 	uint32_t build_constant_uint_array_pointer();
			
 
				 	void emit_entry_point_declarations() override;
			
@@ -913,6 +941,9 @@ protected:
 
				 	uint32_t view_mask_buffer_id = 0;
			
 
				 	uint32_t dynamic_offsets_buffer_id = 0;
			
 
				 	uint32_t uint_type_id = 0;
			
 
				+	uint32_t argument_buffer_padding_buffer_type_id = 0;
			
 
				+	uint32_t argument_buffer_padding_image_type_id = 0;
			
 
				+	uint32_t argument_buffer_padding_sampler_type_id = 0;
			
 
				 
			
 
				 	bool does_shader_write_sample_mask = false;
			
 
				 
			
@@ -922,6 +953,7 @@ protected:
 
				 
			
 
				 	void analyze_sampled_image_usage();
			
 
				 
			
 
				+	bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override;
			
 
				 	void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
			
 
				 	                                            bool &is_packed) override;
			
 
				 	void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length);
			
@@ -941,6 +973,7 @@ protected:
 
				 	std::map<uint32_t, MSLShaderInput> inputs_by_location;
			
 
				 	std::unordered_map<uint32_t, MSLShaderInput> inputs_by_builtin;
			
 
				 	std::unordered_set<uint32_t> location_inputs_in_use;
			
 
				+	std::unordered_set<uint32_t> location_inputs_in_use_fallback;
			
 
				 	std::unordered_map<uint32_t, uint32_t> fragment_output_components;
			
 
				 	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
			
 
				 	std::set<std::string> pragma_lines;
			
@@ -948,7 +981,7 @@ protected:
 
				 	SmallVector<uint32_t> vars_needing_early_declaration;
			
 
				 
			
 
				 	std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
			
 
				-	uint32_t type_to_location_count(const SPIRType &type) const;
			
 
				+	std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number;
			
 
				 
			
 
				 	uint32_t next_metal_resource_index_buffer = 0;
			
 
				 	uint32_t next_metal_resource_index_texture = 0;
			
@@ -962,6 +995,7 @@ protected:
 
				 	VariableID patch_stage_out_var_id = 0;
			
 
				 	VariableID stage_in_ptr_var_id = 0;
			
 
				 	VariableID stage_out_ptr_var_id = 0;
			
 
				+	VariableID stage_out_masked_builtin_type_id = 0;
			
 
				 
			
 
				 	// Handle HLSL-style 0-based vertex/instance index.
			
 
				 	enum class TriState
			
@@ -1027,6 +1061,11 @@ protected:
 
				 
			
 
				 	void analyze_argument_buffers();
			
 
				 	bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
			
 
				+	MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx);
			
 
				+	void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
			
 
				+	void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
			
 
				+	void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
			
 
				+	void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count);
			
 
				 
			
 
				 	uint32_t get_target_components_for_fragment_location(uint32_t location) const;
			
 
				 	uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components,
			
@@ -1044,6 +1083,8 @@ protected:
 
				 	bool type_is_pointer_to_pointer(const SPIRType &type) const;
			
 
				 	bool is_supported_argument_buffer_type(const SPIRType &type) const;
			
 
				 
			
 
				+	bool variable_storage_requires_stage_io(spv::StorageClass storage) const;
			
 
				+
			
 
				 	// OpcodeHandler that handles several MSL preprocessing operations.
			
 
				 	struct OpCodePreprocessor : OpcodeHandler
			
 
				 	{
			
@@ -1087,11 +1128,8 @@ protected:
 
				 	{
			
 
				 		enum SortAspect
			
 
				 		{
			
 
				-			Location,
			
 
				-			LocationReverse,
			
 
				-			Offset,
			
 
				-			OffsetThenLocationReverse,
			
 
				-			Alphabetical
			
 
				+			LocationThenBuiltInType,
			
 
				+			Offset
			
 
				 		};
			
 
				 
			
 
				 		void sort();