8 months ago · effea567a5
--- a/drivers/metal/metal_device_properties.h
+++ b/drivers/metal/metal_device_properties.h
@@ -71,18 +71,19 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) {
 
				 };
			
 
				 
			
 
				 struct API_AVAILABLE(macos(11.0), ios(14.0)) MetalFeatures {
			
 
				-	uint32_t mslVersion;
			
 
				-	MTLGPUFamily highestFamily;
			
 
				-	MTLLanguageVersion mslVersionEnum;
			
 
				-	SampleCount supportedSampleCounts;
			
 
				-	long hostMemoryPageSize;
			
 
				-	bool layeredRendering;
			
 
				-	bool multisampleLayeredRendering;
			
 
				-	bool quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
			
 
				-	bool simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
			
 
				-	bool simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
			
 
				-	bool tessellationShader; /**< If true, tessellation shaders are supported. */
			
 
				-	bool imageCubeArray; /**< If true, image cube arrays are supported. */
			
 
				+	uint32_t mslVersion = 0;
			
 
				+	MTLGPUFamily highestFamily = MTLGPUFamilyApple4;
			
 
				+	MTLLanguageVersion mslVersionEnum = MTLLanguageVersion1_2;
			
 
				+	SampleCount supportedSampleCounts = SampleCount1;
			
 
				+	long hostMemoryPageSize = 0;
			
 
				+	bool layeredRendering = false;
			
 
				+	bool multisampleLayeredRendering = false;
			
 
				+	bool quadPermute = false; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
			
 
				+	bool simdPermute = false; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
			
 
				+	bool simdReduction = false; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
			
 
				+	bool tessellationShader = false; /**< If true, tessellation shaders are supported. */
			
 
				+	bool imageCubeArray = false; /**< If true, image cube arrays are supported. */
			
 
				+	MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1;
			
 
				 };
			
 
				 
			
 
				 struct MetalLimits {
			
--- a/drivers/metal/metal_device_properties.mm
+++ b/drivers/metal/metal_device_properties.mm
@@ -98,6 +98,7 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
 
				 	features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4];
			
 
				 	features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
			
 
				 	features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
			
 
				+	features.argument_buffers_tier = p_device.argumentBuffersSupport;
			
 
				 
			
 
				 	MTLCompileOptions *opts = [MTLCompileOptions new];
			
 
				 	features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
			
--- a/drivers/metal/metal_objects.h
+++ b/drivers/metal/metal_objects.h
@@ -696,11 +696,12 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDShader {
 
				 public:
			
 
				 	CharString name;
			
 
				 	Vector<UniformSet> sets;
			
 
				+	bool uses_argument_buffers = true;
			
 
				 
			
 
				 	virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
			
 
				 
			
 
				-	MDShader(CharString p_name, Vector<UniformSet> p_sets) :
			
 
				-			name(p_name), sets(p_sets) {}
			
 
				+	MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
			
 
				+			name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
			
 
				 	virtual ~MDShader() = default;
			
 
				 };
			
 
				 
			
@@ -719,7 +720,7 @@ public:
 
				 
			
 
				 	void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
			
 
				 
			
 
				-	MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel);
			
 
				+	MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
			
 
				 };
			
 
				 
			
 
				 class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderShader final : public MDShader {
			
@@ -746,8 +747,9 @@ public:
 
				 	void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
			
 
				 
			
 
				 	MDRenderShader(CharString p_name,
			
 
				-			bool p_needs_view_mask_buffer,
			
 
				 			Vector<UniformSet> p_sets,
			
 
				+			bool p_needs_view_mask_buffer,
			
 
				+			bool p_uses_argument_buffers,
			
 
				 			MDLibrary *p_vert, MDLibrary *p_frag);
			
 
				 };
			
 
				 
			
@@ -783,12 +785,21 @@ struct BoundUniformSet {
 
				 };
			
 
				 
			
 
				 class API_AVAILABLE(macos(11.0), ios(14.0)) MDUniformSet {
			
 
				+private:
			
 
				+	void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
			
 
				+	void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
			
 
				+	void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
			
 
				+	void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
			
 
				+
			
 
				 public:
			
 
				 	uint32_t index;
			
 
				 	LocalVector<RDD::BoundUniform> uniforms;
			
 
				 	HashMap<MDShader *, BoundUniformSet> bound_uniforms;
			
 
				 
			
 
				-	BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device);
			
 
				+	void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
			
 
				+	void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
			
 
				+
			
 
				+	BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage);
			
 
				 };
			
 
				 
			
 
				 class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline {
			
--- a/drivers/metal/metal_objects.mm
+++ b/drivers/metal/metal_objects.mm
@@ -249,7 +249,7 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
 
				 	const MDSubpass &subpass = render.get_subpass();
			
 
				 
			
 
				 	uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count;
			
 
				-	simd::float4 vertices[vertex_count];
			
 
				+	simd::float4 *vertices = ALLOCA_ARRAY(simd::float4, vertex_count);
			
 
				 	simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT];
			
 
				 
			
 
				 	Size2i size = render.frameBuffer->size;
			
@@ -362,7 +362,7 @@ void MDCommandBuffer::_render_set_dirty_state() {
 
				 
			
 
				 	if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) {
			
 
				 		size_t len = render.scissors.size();
			
 
				-		MTLScissorRect rects[len];
			
 
				+		MTLScissorRect *rects = ALLOCA_ARRAY(MTLScissorRect, len);
			
 
				 		for (size_t i = 0; i < len; i++) {
			
 
				 			rects[i] = render.clip_to_render_area(render.scissors[i]);
			
 
				 		}
			
@@ -466,9 +466,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
 
				 	uint64_t set_uniforms = render.uniform_set_mask;
			
 
				 	render.uniform_set_mask = 0;
			
 
				 
			
 
				-	id<MTLRenderCommandEncoder> enc = render.encoder;
			
 
				 	MDRenderShader *shader = render.pipeline->shader;
			
 
				-	id<MTLDevice> device = enc.device;
			
 
				 
			
 
				 	while (set_uniforms != 0) {
			
 
				 		// Find the index of the next set bit.
			
@@ -479,25 +477,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
 
				 		if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) {
			
 
				 			continue;
			
 
				 		}
			
 
				-		UniformSet const &set_info = shader->sets[set->index];
			
 
				-
			
 
				-		BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
			
 
				-		bus.merge_into(render.resource_usage);
			
 
				-
			
 
				-		// Set the buffer for the vertex stage.
			
 
				-		{
			
 
				-			uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
			
 
				-			if (offset) {
			
 
				-				[enc setVertexBuffer:bus.buffer offset:*offset atIndex:set->index];
			
 
				-			}
			
 
				-		}
			
 
				-		// Set the buffer for the fragment stage.
			
 
				-		{
			
 
				-			uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
			
 
				-			if (offset) {
			
 
				-				[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:set->index];
			
 
				-			}
			
 
				-		}
			
 
				+		set->bind_uniforms(shader, render);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -968,54 +948,21 @@ void MDCommandBuffer::ComputeState::end_encoding() {
 
				 void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
			
 
				 	DEV_ASSERT(type == MDCommandBufferStateType::Compute);
			
 
				 
			
 
				-	id<MTLComputeCommandEncoder> enc = compute.encoder;
			
 
				-	id<MTLDevice> device = enc.device;
			
 
				-
			
 
				 	MDShader *shader = (MDShader *)(p_shader.id);
			
 
				-	UniformSet const &set_info = shader->sets[p_set_index];
			
 
				-
			
 
				 	MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
			
 
				-	BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
			
 
				-	bus.merge_into(compute.resource_usage);
			
 
				-
			
 
				-	uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
			
 
				-	if (offset) {
			
 
				-		[enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index];
			
 
				-	}
			
 
				+	set->bind_uniforms(shader, compute);
			
 
				 }
			
 
				 
			
 
				 void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
			
 
				 	DEV_ASSERT(type == MDCommandBufferStateType::Compute);
			
 
				 
			
 
				-	id<MTLComputeCommandEncoder> enc = compute.encoder;
			
 
				-	id<MTLDevice> device = enc.device;
			
 
				-
			
 
				 	MDShader *shader = (MDShader *)(p_shader.id);
			
 
				 
			
 
				-	thread_local LocalVector<__unsafe_unretained id<MTLBuffer>> buffers;
			
 
				-	thread_local LocalVector<NSUInteger> offsets;
			
 
				-
			
 
				-	buffers.resize(p_set_count);
			
 
				-	offsets.resize(p_set_count);
			
 
				-
			
 
				+	// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
			
 
				 	for (size_t i = 0u; i < p_set_count; ++i) {
			
 
				-		UniformSet const &set_info = shader->sets[p_first_set_index + i];
			
 
				-
			
 
				 		MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
			
 
				-		BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
			
 
				-		bus.merge_into(compute.resource_usage);
			
 
				-
			
 
				-		uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
			
 
				-		if (offset) {
			
 
				-			buffers[i] = bus.buffer;
			
 
				-			offsets[i] = *offset;
			
 
				-		} else {
			
 
				-			buffers[i] = nullptr;
			
 
				-			offsets[i] = 0u;
			
 
				-		}
			
 
				+		set->bind_uniforms(shader, compute);
			
 
				 	}
			
 
				-
			
 
				-	[enc setBuffers:buffers.ptr() offsets:offsets.ptr() withRange:NSMakeRange(p_first_set_index, p_set_count)];
			
 
				 }
			
 
				 
			
 
				 void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
			
@@ -1052,8 +999,11 @@ void MDCommandBuffer::_end_blit() {
 
				 	type = MDCommandBufferStateType::None;
			
 
				 }
			
 
				 
			
 
				-MDComputeShader::MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel) :
			
 
				-		MDShader(p_name, p_sets), kernel(p_kernel) {
			
 
				+MDComputeShader::MDComputeShader(CharString p_name,
			
 
				+		Vector<UniformSet> p_sets,
			
 
				+		bool p_uses_argument_buffers,
			
 
				+		MDLibrary *p_kernel) :
			
 
				+		MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
			
 
				 }
			
 
				 
			
 
				 void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
			
@@ -1071,15 +1021,19 @@ void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDC
 
				 }
			
 
				 
			
 
				 MDRenderShader::MDRenderShader(CharString p_name,
			
 
				-		bool p_needs_view_mask_buffer,
			
 
				 		Vector<UniformSet> p_sets,
			
 
				+		bool p_needs_view_mask_buffer,
			
 
				+		bool p_uses_argument_buffers,
			
 
				 		MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) :
			
 
				-		MDShader(p_name, p_sets), needs_view_mask_buffer(p_needs_view_mask_buffer), vert(p_vert), frag(p_frag) {
			
 
				+		MDShader(p_name, p_sets, p_uses_argument_buffers),
			
 
				+		needs_view_mask_buffer(p_needs_view_mask_buffer),
			
 
				+		vert(p_vert),
			
 
				+		frag(p_frag) {
			
 
				 }
			
 
				 
			
 
				 void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
			
 
				 	DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
			
 
				-	id<MTLRenderCommandEncoder> enc = p_cb->render.encoder;
			
 
				+	id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_cb->render.encoder;
			
 
				 
			
 
				 	void const *ptr = p_data.ptr();
			
 
				 	size_t length = p_data.size() * sizeof(uint32_t);
			
@@ -1093,9 +1047,373 @@ void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCo
 
				 	}
			
 
				 }
			
 
				 
			
 
				-BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device) {
			
 
				+void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
			
 
				+	DEV_ASSERT(p_shader->uses_argument_buffers);
			
 
				+	DEV_ASSERT(p_state.encoder != nil);
			
 
				+
			
 
				+	UniformSet const &set_info = p_shader->sets[index];
			
 
				+
			
 
				+	id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
			
 
				+	id<MTLDevice> __unsafe_unretained device = enc.device;
			
 
				+
			
 
				+	BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage);
			
 
				+
			
 
				+	// Set the buffer for the vertex stage.
			
 
				+	{
			
 
				+		uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
			
 
				+		if (offset) {
			
 
				+			[enc setVertexBuffer:bus.buffer offset:*offset atIndex:index];
			
 
				+		}
			
 
				+	}
			
 
				+	// Set the buffer for the fragment stage.
			
 
				+	{
			
 
				+		uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
			
 
				+		if (offset) {
			
 
				+			[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:index];
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
			
 
				+	DEV_ASSERT(!p_shader->uses_argument_buffers);
			
 
				+	DEV_ASSERT(p_state.encoder != nil);
			
 
				+
			
 
				+	id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
			
 
				+
			
 
				+	UniformSet const &set = p_shader->sets[index];
			
 
				+
			
 
				+	for (uint32_t i = 0; i < uniforms.size(); i++) {
			
 
				+		RDD::BoundUniform const &uniform = uniforms[i];
			
 
				+		UniformInfo ui = set.uniforms[i];
			
 
				+
			
 
				+		static const RDC::ShaderStage stage_usages[2] = { RDC::ShaderStage::SHADER_STAGE_VERTEX, RDC::ShaderStage::SHADER_STAGE_FRAGMENT };
			
 
				+		for (const RDC::ShaderStage stage : stage_usages) {
			
 
				+			ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
			
 
				+
			
 
				+			BindingInfo *bi = ui.bindings.getptr(stage);
			
 
				+			if (bi == nullptr) {
			
 
				+				// No binding for this stage.
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if ((ui.active_stages & stage_usage) == 0) {
			
 
				+				// Not active for this state, so don't bind anything.
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			switch (uniform.type) {
			
 
				+				case RDD::UNIFORM_TYPE_SAMPLER: {
			
 
				+					size_t count = uniform.ids.size();
			
 
				+					id<MTLSamplerState> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
			
 
				+					for (size_t j = 0; j < count; j += 1) {
			
 
				+						objects[j] = rid::get(uniform.ids[j].id);
			
 
				+					}
			
 
				+					if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+						[enc setVertexSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+					} else {
			
 
				+						[enc setFragmentSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+					}
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
			
 
				+					size_t count = uniform.ids.size() / 2;
			
 
				+					id<MTLTexture> __unsafe_unretained *textures = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+					id<MTLSamplerState> __unsafe_unretained *samplers = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
			
 
				+					for (uint32_t j = 0; j < count; j += 1) {
			
 
				+						id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]);
			
 
				+						id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]);
			
 
				+						samplers[j] = sampler;
			
 
				+						textures[j] = texture;
			
 
				+					}
			
 
				+					BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
			
 
				+					if (sbi) {
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
			
 
				+						} else {
			
 
				+							[enc setFragmentSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
			
 
				+						}
			
 
				+					}
			
 
				+					if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+						[enc setVertexTextures:textures withRange:NSMakeRange(bi->index, count)];
			
 
				+					} else {
			
 
				+						[enc setFragmentTextures:textures withRange:NSMakeRange(bi->index, count)];
			
 
				+					}
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_TEXTURE: {
			
 
				+					size_t count = uniform.ids.size();
			
 
				+					if (count == 1) {
			
 
				+						id<MTLTexture> obj = rid::get(uniform.ids[0]);
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexTexture:obj atIndex:bi->index];
			
 
				+						} else {
			
 
				+							[enc setFragmentTexture:obj atIndex:bi->index];
			
 
				+						}
			
 
				+					} else {
			
 
				+						id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+						for (size_t j = 0; j < count; j += 1) {
			
 
				+							id<MTLTexture> obj = rid::get(uniform.ids[j]);
			
 
				+							objects[j] = obj;
			
 
				+						}
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+						} else {
			
 
				+							[enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+						}
			
 
				+					}
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_IMAGE: {
			
 
				+					size_t count = uniform.ids.size();
			
 
				+					if (count == 1) {
			
 
				+						id<MTLTexture> obj = rid::get(uniform.ids[0]);
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexTexture:obj atIndex:bi->index];
			
 
				+						} else {
			
 
				+							[enc setFragmentTexture:obj atIndex:bi->index];
			
 
				+						}
			
 
				+
			
 
				+						BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
			
 
				+						if (sbi) {
			
 
				+							id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
			
 
				+							id<MTLBuffer> buf = tex.buffer;
			
 
				+							if (buf) {
			
 
				+								if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+									[enc setVertexBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
			
 
				+								} else {
			
 
				+									[enc setFragmentBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
			
 
				+								}
			
 
				+							}
			
 
				+						}
			
 
				+					} else {
			
 
				+						id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+						for (size_t j = 0; j < count; j += 1) {
			
 
				+							id<MTLTexture> obj = rid::get(uniform.ids[j]);
			
 
				+							objects[j] = obj;
			
 
				+						}
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+						} else {
			
 
				+							[enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+						}
			
 
				+					}
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: {
			
 
				+					ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
			
 
				+					ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
			
 
				+					CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
			
 
				+					id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
			
 
				+					if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+						[enc setVertexBuffer:buffer offset:0 atIndex:bi->index];
			
 
				+					} else {
			
 
				+						[enc setFragmentBuffer:buffer offset:0 atIndex:bi->index];
			
 
				+					}
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
			
 
				+					id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
			
 
				+					if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+						[enc setVertexBuffer:buffer offset:0 atIndex:bi->index];
			
 
				+					} else {
			
 
				+						[enc setFragmentBuffer:buffer offset:0 atIndex:bi->index];
			
 
				+					}
			
 
				+				} break;
			
 
				+				case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
			
 
				+					size_t count = uniform.ids.size();
			
 
				+					if (count == 1) {
			
 
				+						id<MTLTexture> obj = rid::get(uniform.ids[0]);
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexTexture:obj atIndex:bi->index];
			
 
				+						} else {
			
 
				+							[enc setFragmentTexture:obj atIndex:bi->index];
			
 
				+						}
			
 
				+					} else {
			
 
				+						id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+						for (size_t j = 0; j < count; j += 1) {
			
 
				+							id<MTLTexture> obj = rid::get(uniform.ids[j]);
			
 
				+							objects[j] = obj;
			
 
				+						}
			
 
				+
			
 
				+						if (stage == RDD::SHADER_STAGE_VERTEX) {
			
 
				+							[enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+						} else {
			
 
				+							[enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+						}
			
 
				+					}
			
 
				+				} break;
			
 
				+				default: {
			
 
				+					DEV_ASSERT(false);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
			
 
				+	if (p_shader->uses_argument_buffers) {
			
 
				+		bind_uniforms_argument_buffers(p_shader, p_state);
			
 
				+	} else {
			
 
				+		bind_uniforms_direct(p_shader, p_state);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
			
 
				+	DEV_ASSERT(p_shader->uses_argument_buffers);
			
 
				+	DEV_ASSERT(p_state.encoder != nil);
			
 
				+
			
 
				+	UniformSet const &set_info = p_shader->sets[index];
			
 
				+
			
 
				+	id<MTLComputeCommandEncoder> enc = p_state.encoder;
			
 
				+	id<MTLDevice> device = enc.device;
			
 
				+
			
 
				+	BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage);
			
 
				+
			
 
				+	uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
			
 
				+	if (offset) {
			
 
				+		[enc setBuffer:bus.buffer offset:*offset atIndex:index];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
			
 
				+	DEV_ASSERT(!p_shader->uses_argument_buffers);
			
 
				+	DEV_ASSERT(p_state.encoder != nil);
			
 
				+
			
 
				+	id<MTLComputeCommandEncoder> __unsafe_unretained enc = p_state.encoder;
			
 
				+
			
 
				+	UniformSet const &set = p_shader->sets[index];
			
 
				+
			
 
				+	for (uint32_t i = 0; i < uniforms.size(); i++) {
			
 
				+		RDD::BoundUniform const &uniform = uniforms[i];
			
 
				+		UniformInfo ui = set.uniforms[i];
			
 
				+
			
 
				+		const RDC::ShaderStage stage = RDC::ShaderStage::SHADER_STAGE_COMPUTE;
			
 
				+		const ShaderStageUsage stage_usage = ShaderStageUsage(1 << stage);
			
 
				+
			
 
				+		BindingInfo *bi = ui.bindings.getptr(stage);
			
 
				+		if (bi == nullptr) {
			
 
				+			// No binding for this stage.
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if ((ui.active_stages & stage_usage) == 0) {
			
 
				+			// Not active for this state, so don't bind anything.
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		switch (uniform.type) {
			
 
				+			case RDD::UNIFORM_TYPE_SAMPLER: {
			
 
				+				size_t count = uniform.ids.size();
			
 
				+				id<MTLSamplerState> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
			
 
				+				for (size_t j = 0; j < count; j += 1) {
			
 
				+					objects[j] = rid::get(uniform.ids[j].id);
			
 
				+				}
			
 
				+				[enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
			
 
				+				size_t count = uniform.ids.size() / 2;
			
 
				+				id<MTLTexture> __unsafe_unretained *textures = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+				id<MTLSamplerState> __unsafe_unretained *samplers = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
			
 
				+				for (uint32_t j = 0; j < count; j += 1) {
			
 
				+					id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]);
			
 
				+					id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]);
			
 
				+					samplers[j] = sampler;
			
 
				+					textures[j] = texture;
			
 
				+				}
			
 
				+				BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
			
 
				+				if (sbi) {
			
 
				+					[enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
			
 
				+				}
			
 
				+				[enc setTextures:textures withRange:NSMakeRange(bi->index, count)];
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_TEXTURE: {
			
 
				+				size_t count = uniform.ids.size();
			
 
				+				if (count == 1) {
			
 
				+					id<MTLTexture> obj = rid::get(uniform.ids[0]);
			
 
				+					[enc setTexture:obj atIndex:bi->index];
			
 
				+				} else {
			
 
				+					id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+					for (size_t j = 0; j < count; j += 1) {
			
 
				+						id<MTLTexture> obj = rid::get(uniform.ids[j]);
			
 
				+						objects[j] = obj;
			
 
				+					}
			
 
				+					[enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+				}
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_IMAGE: {
			
 
				+				size_t count = uniform.ids.size();
			
 
				+				if (count == 1) {
			
 
				+					id<MTLTexture> obj = rid::get(uniform.ids[0]);
			
 
				+					[enc setTexture:obj atIndex:bi->index];
			
 
				+
			
 
				+					BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
			
 
				+					if (sbi) {
			
 
				+						id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
			
 
				+						id<MTLBuffer> buf = tex.buffer;
			
 
				+						if (buf) {
			
 
				+							[enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
			
 
				+						}
			
 
				+					}
			
 
				+				} else {
			
 
				+					id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+					for (size_t j = 0; j < count; j += 1) {
			
 
				+						id<MTLTexture> obj = rid::get(uniform.ids[j]);
			
 
				+						objects[j] = obj;
			
 
				+					}
			
 
				+					[enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+				}
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: {
			
 
				+				ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
			
 
				+				ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
			
 
				+				CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
			
 
				+				id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
			
 
				+				[enc setBuffer:buffer offset:0 atIndex:bi->index];
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
			
 
				+				id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
			
 
				+				[enc setBuffer:buffer offset:0 atIndex:bi->index];
			
 
				+			} break;
			
 
				+			case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
			
 
				+				size_t count = uniform.ids.size();
			
 
				+				if (count == 1) {
			
 
				+					id<MTLTexture> obj = rid::get(uniform.ids[0]);
			
 
				+					[enc setTexture:obj atIndex:bi->index];
			
 
				+				} else {
			
 
				+					id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
			
 
				+					for (size_t j = 0; j < count; j += 1) {
			
 
				+						id<MTLTexture> obj = rid::get(uniform.ids[j]);
			
 
				+						objects[j] = obj;
			
 
				+					}
			
 
				+					[enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
			
 
				+				}
			
 
				+			} break;
			
 
				+			default: {
			
 
				+				DEV_ASSERT(false);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
			
 
				+	if (p_shader->uses_argument_buffers) {
			
 
				+		bind_uniforms_argument_buffers(p_shader, p_state);
			
 
				+	} else {
			
 
				+		bind_uniforms_direct(p_shader, p_state);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage) {
			
 
				 	BoundUniformSet *sus = bound_uniforms.getptr(p_shader);
			
 
				 	if (sus != nullptr) {
			
 
				+		sus->merge_into(p_resource_usage);
			
 
				 		return *sus;
			
 
				 	}
			
 
				 
			
@@ -1261,6 +1579,7 @@ BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<M
 
				 
			
 
				 	BoundUniformSet bs = { .buffer = enc_buffer, .usage_to_resources = usage_to_resources };
			
 
				 	bound_uniforms.insert(p_shader, bs);
			
 
				+	bs.merge_into(p_resource_usage);
			
 
				 	return bound_uniforms.get(p_shader);
			
 
				 }
			
 
				 
			
--- a/drivers/metal/rendering_device_driver_metal.h
+++ b/drivers/metal/rendering_device_driver_metal.h
@@ -61,7 +61,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public
 
				 
			
 
				 	uint32_t version_major = 2;
			
 
				 	uint32_t version_minor = 0;
			
 
				-	MetalDeviceProperties *metal_device_properties = nullptr;
			
 
				+	MetalDeviceProperties *device_properties = nullptr;
			
 
				 	PixelFormats *pixel_formats = nullptr;
			
 
				 	std::unique_ptr<MDResourceCache> resource_cache;
			
 
				 
			
@@ -431,10 +431,10 @@ public:
 
				 	id<MTLDevice> get_device() const { return device; }
			
 
				 	PixelFormats &get_pixel_formats() const { return *pixel_formats; }
			
 
				 	MDResourceCache &get_resource_cache() const { return *resource_cache; }
			
 
				-	MetalDeviceProperties const &get_device_properties() const { return *metal_device_properties; }
			
 
				+	MetalDeviceProperties const &get_device_properties() const { return *device_properties; }
			
 
				 
			
 
				 	_FORCE_INLINE_ uint32_t get_metal_buffer_index_for_vertex_attribute_binding(uint32_t p_binding) {
			
 
				-		return (metal_device_properties->limits.maxPerStageBufferCount - 1) - p_binding;
			
 
				+		return (device_properties->limits.maxPerStageBufferCount - 1) - p_binding;
			
 
				 	}
			
 
				 
			
 
				 	size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const;
			
--- a/drivers/metal/rendering_device_driver_metal.mm
+++ b/drivers/metal/rendering_device_driver_metal.mm
@@ -218,7 +218,7 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
 
				 	// desc.compressionType = MTLTextureCompressionTypeLossy;
			
 
				 
			
 
				 	if (p_format.samples > TEXTURE_SAMPLES_1) {
			
 
				-		SampleCount supported = (*metal_device_properties).find_nearest_supported_sample_count(p_format.samples);
			
 
				+		SampleCount supported = (*device_properties).find_nearest_supported_sample_count(p_format.samples);
			
 
				 
			
 
				 		if (supported > SampleCount1) {
			
 
				 			bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY;
			
@@ -277,7 +277,7 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
 
				 	// Usage.
			
 
				 
			
 
				 	MTLResourceOptions options = 0;
			
 
				-	const bool supports_memoryless = (*metal_device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*metal_device_properties).features.highestFamily < MTLGPUFamilyMac1;
			
 
				+	const bool supports_memoryless = (*device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*device_properties).features.highestFamily < MTLGPUFamilyMac1;
			
 
				 	if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) {
			
 
				 		options = MTLResourceStorageModeMemoryless | MTLResourceHazardTrackingModeTracked;
			
 
				 		desc.storageMode = MTLStorageModeMemoryless;
			
@@ -1058,7 +1058,7 @@ void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) {
 
				 
			
 
				 #pragma mark - Shader
			
 
				 
			
 
				-const uint32_t SHADER_BINARY_VERSION = 3;
			
 
				+const uint32_t SHADER_BINARY_VERSION = 4;
			
 
				 
			
 
				 // region Serialization
			
 
				 
			
@@ -1503,6 +1503,9 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformSetData {
 
				 		p_reader.read(index);
			
 
				 		p_reader.read(uniforms);
			
 
				 	}
			
 
				+	UniformSetData() = default;
			
 
				+	UniformSetData(uint32_t p_index) :
			
 
				+			index(p_index) {}
			
 
				 };
			
 
				 
			
 
				 struct PushConstantData {
			
@@ -1536,6 +1539,11 @@ struct PushConstantData {
 
				 };
			
 
				 
			
 
				 struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
			
 
				+	enum Flags : uint32_t {
			
 
				+		NONE = 0,
			
 
				+		NEEDS_VIEW_MASK_BUFFER = 1 << 0,
			
 
				+		USES_ARGUMENT_BUFFERS = 1 << 1,
			
 
				+	};
			
 
				 	CharString shader_name;
			
 
				 	// The Metal language version specified when compiling SPIR-V to MSL.
			
 
				 	// Format is major * 10000 + minor * 100 + patch.
			
@@ -1543,8 +1551,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 
				 	uint32_t vertex_input_mask = UINT32_MAX;
			
 
				 	uint32_t fragment_output_mask = UINT32_MAX;
			
 
				 	uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX;
			
 
				-	uint32_t is_compute = UINT32_MAX;
			
 
				-	uint32_t needs_view_mask_buffer = UINT32_MAX;
			
 
				+	uint32_t flags = NONE;
			
 
				 	ComputeSize compute_local_size;
			
 
				 	PushConstantData push_constant;
			
 
				 	LocalVector<ShaderStageData> stages;
			
@@ -1557,17 +1564,46 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 
				 		return MTLLanguageVersion((major << 0x10) + minor);
			
 
				 	}
			
 
				 
			
 
				+	bool is_compute() const {
			
 
				+		return std::any_of(stages.begin(), stages.end(), [](ShaderStageData const &e) {
			
 
				+			return e.stage == RD::ShaderStage::SHADER_STAGE_COMPUTE;
			
 
				+		});
			
 
				+	}
			
 
				+
			
 
				+	bool needs_view_mask_buffer() const {
			
 
				+		return flags & NEEDS_VIEW_MASK_BUFFER;
			
 
				+	}
			
 
				+
			
 
				+	void set_needs_view_mask_buffer(bool p_value) {
			
 
				+		if (p_value) {
			
 
				+			flags |= NEEDS_VIEW_MASK_BUFFER;
			
 
				+		} else {
			
 
				+			flags &= ~NEEDS_VIEW_MASK_BUFFER;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	bool uses_argument_buffers() const {
			
 
				+		return flags & USES_ARGUMENT_BUFFERS;
			
 
				+	}
			
 
				+
			
 
				+	void set_uses_argument_buffers(bool p_value) {
			
 
				+		if (p_value) {
			
 
				+			flags |= USES_ARGUMENT_BUFFERS;
			
 
				+		} else {
			
 
				+			flags &= ~USES_ARGUMENT_BUFFERS;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	size_t serialize_size() const {
			
 
				 		size_t size = 0;
			
 
				 		size += sizeof(uint32_t) + shader_name.length(); // shader_name
			
 
				-		size += sizeof(uint32_t); // msl_version
			
 
				-		size += sizeof(uint32_t); // vertex_input_mask
			
 
				-		size += sizeof(uint32_t); // fragment_output_mask
			
 
				-		size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask
			
 
				-		size += sizeof(uint32_t); // is_compute
			
 
				-		size += sizeof(uint32_t); // needs_view_mask_buffer
			
 
				-		size += compute_local_size.serialize_size(); // compute_local_size
			
 
				-		size += push_constant.serialize_size(); // push_constant
			
 
				+		size += sizeof(msl_version);
			
 
				+		size += sizeof(vertex_input_mask);
			
 
				+		size += sizeof(fragment_output_mask);
			
 
				+		size += sizeof(spirv_specialization_constants_ids_mask);
			
 
				+		size += sizeof(flags);
			
 
				+		size += compute_local_size.serialize_size();
			
 
				+		size += push_constant.serialize_size();
			
 
				 		size += sizeof(uint32_t); // stages.size()
			
 
				 		for (ShaderStageData const &e : stages) {
			
 
				 			size += e.serialize_size();
			
@@ -1589,8 +1625,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 
				 		p_writer.write(vertex_input_mask);
			
 
				 		p_writer.write(fragment_output_mask);
			
 
				 		p_writer.write(spirv_specialization_constants_ids_mask);
			
 
				-		p_writer.write(is_compute);
			
 
				-		p_writer.write(needs_view_mask_buffer);
			
 
				+		p_writer.write(flags);
			
 
				 		p_writer.write(compute_local_size);
			
 
				 		p_writer.write(push_constant);
			
 
				 		p_writer.write(VectorView(stages));
			
@@ -1604,8 +1639,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 
				 		p_reader.read(vertex_input_mask);
			
 
				 		p_reader.read(fragment_output_mask);
			
 
				 		p_reader.read(spirv_specialization_constants_ids_mask);
			
 
				-		p_reader.read(is_compute);
			
 
				-		p_reader.read(needs_view_mask_buffer);
			
 
				+		p_reader.read(flags);
			
 
				 		p_reader.read(compute_local_size);
			
 
				 		p_reader.read(push_constant);
			
 
				 		p_reader.read(stages);
			
@@ -1952,14 +1986,13 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 
				 		.y = spirv_data.compute_local_size[1],
			
 
				 		.z = spirv_data.compute_local_size[2],
			
 
				 	};
			
 
				-	bin_data.is_compute = spirv_data.is_compute;
			
 
				 	bin_data.push_constant.size = spirv_data.push_constant_size;
			
 
				 	bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages;
			
 
				-	bin_data.needs_view_mask_buffer = shader_meta.has_multiview ? 1 : 0;
			
 
				+	bin_data.set_needs_view_mask_buffer(shader_meta.has_multiview);
			
 
				 
			
 
				 	for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) {
			
 
				 		const ::Vector<ShaderUniform> &spirv_set = spirv_data.uniform_sets[i];
			
 
				-		UniformSetData set{ .index = i };
			
 
				+		UniformSetData set(i);
			
 
				 		for (const ShaderUniform &spirv_uniform : spirv_set) {
			
 
				 			UniformData binding{};
			
 
				 			binding.type = spirv_uniform.type;
			
@@ -1999,10 +2032,25 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 
				 #endif
			
 
				 
			
 
				 #if TARGET_OS_IOS
			
 
				-	msl_options.ios_use_simdgroup_functions = (*metal_device_properties).features.simdPermute;
			
 
				+	msl_options.ios_use_simdgroup_functions = (*device_properties).features.simdPermute;
			
 
				 #endif
			
 
				 
			
 
				-	msl_options.argument_buffers = true;
			
 
				+	bool disable_argument_buffers = false;
			
 
				+	if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
			
 
				+		disable_argument_buffers = true;
			
 
				+	}
			
 
				+
			
 
				+	if (device_properties->features.argument_buffers_tier >= MTLArgumentBuffersTier2 && !disable_argument_buffers) {
			
 
				+		msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2;
			
 
				+		msl_options.argument_buffers = true;
			
 
				+		bin_data.set_uses_argument_buffers(true);
			
 
				+	} else {
			
 
				+		msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1;
			
 
				+		// Tier 1 argument buffers don't support writable textures, so we disable them completely.
			
 
				+		msl_options.argument_buffers = false;
			
 
				+		bin_data.set_uses_argument_buffers(false);
			
 
				+	}
			
 
				+
			
 
				 	msl_options.force_active_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers.
			
 
				 	// msl_options.pad_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers.
			
 
				 	msl_options.texture_buffer_native = true; // Enable texture buffer support.
			
@@ -2042,7 +2090,12 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 
				 		std::unordered_set<VariableID> active = compiler.get_active_interface_variables();
			
 
				 		ShaderResources resources = compiler.get_shader_resources();
			
 
				 
			
 
				-		std::string source = compiler.compile();
			
 
				+		std::string source;
			
 
				+		try {
			
 
				+			source = compiler.compile();
			
 
				+		} catch (CompilerError &e) {
			
 
				+			ERR_FAIL_V_MSG(Result(), "Failed to compile stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what());
			
 
				+		}
			
 
				 
			
 
				 		ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, Result(), "Expected a single entry point and stage.");
			
 
				 
			
@@ -2088,8 +2141,8 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 
				 			return res;
			
 
				 		};
			
 
				 
			
 
				-		auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector<Resource> &resources, Writable writable) {
			
 
				-			for (Resource const &res : resources) {
			
 
				+		auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector<Resource> &p_resources, Writable p_writable) {
			
 
				+			for (Resource const &res : p_resources) {
			
 
				 				uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet);
			
 
				 				uint32_t dbin = get_decoration(res.id, spv::DecorationBinding);
			
 
				 				UniformData *found = nullptr;
			
@@ -2195,7 +2248,7 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 
				 				}
			
 
				 
			
 
				 				// Update writable.
			
 
				-				if (writable == Writable::Maybe) {
			
 
				+				if (p_writable == Writable::Maybe) {
			
 
				 					if (basetype == BT::Struct) {
			
 
				 						Bitset flags = compiler.get_buffer_block_flags(res.id);
			
 
				 						if (!flags.get(spv::DecorationNonWritable)) {
			
@@ -2384,6 +2437,11 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 
				 			ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer");
			
 
				 	}
			
 
				 
			
 
				+	// We need to regenerate the shader if the cache is moved to an incompatible device.
			
 
				+	ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && binary_data.uses_argument_buffers(),
			
 
				+			ShaderID(),
			
 
				+			"Shader was generated with argument buffers, but device has limited support");
			
 
				+
			
 
				 	MTLCompileOptions *options = [MTLCompileOptions new];
			
 
				 	options.languageVersion = binary_data.get_msl_version();
			
 
				 	HashMap<ShaderStage, MDLibrary *> libraries;
			
@@ -2505,8 +2563,12 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 
				 	}
			
 
				 
			
 
				 	MDShader *shader = nullptr;
			
 
				-	if (binary_data.is_compute) {
			
 
				-		MDComputeShader *cs = new MDComputeShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_COMPUTE]);
			
 
				+	if (binary_data.is_compute()) {
			
 
				+		MDComputeShader *cs = new MDComputeShader(
			
 
				+				binary_data.shader_name,
			
 
				+				uniform_sets,
			
 
				+				binary_data.uses_argument_buffers(),
			
 
				+				libraries[ShaderStage::SHADER_STAGE_COMPUTE]);
			
 
				 
			
 
				 		uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE);
			
 
				 		if (binding) {
			
@@ -2520,7 +2582,13 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 
				 #endif
			
 
				 		shader = cs;
			
 
				 	} else {
			
 
				-		MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, (bool)binary_data.needs_view_mask_buffer, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);
			
 
				+		MDRenderShader *rs = new MDRenderShader(
			
 
				+				binary_data.shader_name,
			
 
				+				uniform_sets,
			
 
				+				binary_data.needs_view_mask_buffer(),
			
 
				+				binary_data.uses_argument_buffers(),
			
 
				+				libraries[ShaderStage::SHADER_STAGE_VERTEX],
			
 
				+				libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);
			
 
				 
			
 
				 		uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX);
			
 
				 		if (vert_binding) {
			
@@ -2547,7 +2615,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 
				 
			
 
				 	r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask;
			
 
				 	r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask;
			
 
				-	r_shader_desc.is_compute = binary_data.is_compute;
			
 
				+	r_shader_desc.is_compute = binary_data.is_compute();
			
 
				 	r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x;
			
 
				 	r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y;
			
 
				 	r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z;
			
@@ -2572,7 +2640,7 @@ void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) {
 
				 RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {
			
 
				 	//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.
			
 
				 
			
 
				-	MDUniformSet *set = new MDUniformSet();
			
 
				+	MDUniformSet *set = memnew(MDUniformSet);
			
 
				 	Vector<BoundUniform> bound_uniforms;
			
 
				 	bound_uniforms.resize(p_uniforms.size());
			
 
				 	for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
			
@@ -2586,7 +2654,7 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<Boun
 
				 
			
 
				 void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) {
			
 
				 	MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id;
			
 
				-	delete obj;
			
 
				+	memdelete(obj);
			
 
				 }
			
 
				 
			
 
				 void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
			
@@ -2800,7 +2868,7 @@ void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_c
 
				 		uint32_t layerCnt = p_subresources.layer_count;
			
 
				 		uint32_t layerEnd = layerStart + layerCnt;
			
 
				 
			
 
				-		MetalFeatures const &features = (*metal_device_properties).features;
			
 
				+		MetalFeatures const &features = (*device_properties).features;
			
 
				 
			
 
				 		// Iterate across mipmap levels and layers, and perform and empty render to clear each.
			
 
				 		for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
			
@@ -3057,7 +3125,7 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Atta
 
				 		MTLPixelFormat format = pf.getMTLPixelFormat(a.format);
			
 
				 		mda.format = format;
			
 
				 		if (a.samples > TEXTURE_SAMPLES_1) {
			
 
				-			mda.samples = (*metal_device_properties).find_nearest_supported_sample_count(a.samples);
			
 
				+			mda.samples = (*device_properties).find_nearest_supported_sample_count(a.samples);
			
 
				 		}
			
 
				 		mda.loadAction = LOAD_ACTIONS[a.load_op];
			
 
				 		mda.storeAction = STORE_ACTIONS[a.store_op];
			
@@ -3436,7 +3504,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create(
 
				 	}
			
 
				 
			
 
				 	if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) {
			
 
				-		pipeline->sample_count = (*metal_device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);
			
 
				+		pipeline->sample_count = (*device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);
			
 
				 	}
			
 
				 	desc.rasterSampleCount = static_cast<NSUInteger>(pipeline->sample_count);
			
 
				 	desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage;
			
@@ -3815,7 +3883,7 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
 
				 }
			
 
				 
			
 
				 uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
			
 
				-	MetalDeviceProperties const &props = (*metal_device_properties);
			
 
				+	MetalDeviceProperties const &props = (*device_properties);
			
 
				 	MetalLimits const &limits = props.limits;
			
 
				 
			
 
				 #if defined(DEV_ENABLED)
			
@@ -3911,11 +3979,13 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
 
				 		case LIMIT_SUBGROUP_MAX_SIZE:
			
 
				 			return limits.maxSubgroupSize;
			
 
				 		case LIMIT_SUBGROUP_IN_SHADERS:
			
 
				-			return (int64_t)limits.subgroupSupportedShaderStages;
			
 
				+			return (uint64_t)limits.subgroupSupportedShaderStages;
			
 
				 		case LIMIT_SUBGROUP_OPERATIONS:
			
 
				-			return (int64_t)limits.subgroupSupportedOperations;
			
 
				+			return (uint64_t)limits.subgroupSupportedOperations;
			
 
				 		UNKNOWN(LIMIT_VRS_TEXEL_WIDTH);
			
 
				 		UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
			
 
				+		UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH);
			
 
				+		UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT);
			
 
				 		default:
			
 
				 			ERR_FAIL_V(0);
			
 
				 	}
			
@@ -4042,11 +4112,11 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
 
				 	// Set the pipeline cache ID based on the Metal version.
			
 
				 	pipeline_cache_id = "metal-driver-" + get_api_version();
			
 
				 
			
 
				-	metal_device_properties = memnew(MetalDeviceProperties(device));
			
 
				+	device_properties = memnew(MetalDeviceProperties(device));
			
 
				 	pixel_formats = memnew(PixelFormats(device));
			
 
				-	if (metal_device_properties->features.layeredRendering) {
			
 
				+	if (device_properties->features.layeredRendering) {
			
 
				 		multiview_capabilities.is_supported = true;
			
 
				-		multiview_capabilities.max_view_count = metal_device_properties->limits.maxViewports;
			
 
				+		multiview_capabilities.max_view_count = device_properties->limits.maxViewports;
			
 
				 		// NOTE: I'm not sure what the limit is as I don't see it referenced anywhere
			
 
				 		multiview_capabilities.max_instance_count = UINT32_MAX;
			
 
				 
			
@@ -4057,11 +4127,10 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
 
				 		print_verbose("- Metal multiview not supported");
			
 
				 	}
			
 
				 
			
 
				-	// Check required features and abort if any of them is missing.
			
 
				-	if (!metal_device_properties->features.imageCubeArray) {
			
 
				-		// NOTE: Apple A11 (Apple4) GPUs support image cube arrays, which are devices from 2017 and newer.
			
 
				-		String error_string = vformat("Your Apple GPU does not support the following features which are required to use Metal-based renderers in Godot:\n\n");
			
 
				-		if (!metal_device_properties->features.imageCubeArray) {
			
 
				+	// The Metal renderer requires Apple4 family. This is 2017 era A11 chips and newer.
			
 
				+	if (device_properties->features.highestFamily < MTLGPUFamilyApple4) {
			
 
				+		String error_string = vformat("Your Apple GPU does not support the following features, which are required to use Metal-based renderers in Godot:\n\n");
			
 
				+		if (!device_properties->features.imageCubeArray) {
			
 
				 			error_string += "- No support for image cube arrays.\n";
			
 
				 		}
			
 
				 
			
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -883,7 +883,7 @@ proposed by these libraries and better integrate them with Godot.
 
				 ## spirv-cross
			
 
				 
			
 
				 - Upstream: https://github.com/KhronosGroup/SPIRV-Cross
			
 
				-- Version: vulkan-sdk-1.3.290.0 (5d127b917f080c6f052553c47170ec0ba702e54f, 2024)
			
 
				+- Version: git (6173e24b31f09a0c3217103a130e74c4ddec14a6, 2024)
			
 
				 - License: Apache 2.0
			
 
				 
			
 
				 Files extracted from upstream source:
			
--- a/thirdparty/spirv-cross/spirv.hpp
+++ b/thirdparty/spirv-cross/spirv.hpp
@@ -1,4 +1,4 @@
 
				-// Copyright (c) 2014-2020 The Khronos Group Inc.
			
 
				+// Copyright (c) 2014-2024 The Khronos Group Inc.
			
 
				 // 
			
 
				 // Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				 // of this software and/or associated documentation files (the "Materials"),
			
@@ -507,6 +507,7 @@ enum Decoration {
 
				     DecorationNoUnsignedWrap = 4470,
			
 
				     DecorationWeightTextureQCOM = 4487,
			
 
				     DecorationBlockMatchTextureQCOM = 4488,
			
 
				+    DecorationBlockMatchSamplerQCOM = 4499,
			
 
				     DecorationExplicitInterpAMD = 4999,
			
 
				     DecorationOverrideCoverageNV = 5248,
			
 
				     DecorationPassthroughNV = 5250,
			
@@ -992,6 +993,7 @@ enum Capability {
 
				     CapabilityTextureSampleWeightedQCOM = 4484,
			
 
				     CapabilityTextureBoxFilterQCOM = 4485,
			
 
				     CapabilityTextureBlockMatchQCOM = 4486,
			
 
				+    CapabilityTextureBlockMatch2QCOM = 4498,
			
 
				     CapabilityFloat16ImageAMD = 5008,
			
 
				     CapabilityImageGatherBiasLodAMD = 5009,
			
 
				     CapabilityFragmentMaskAMD = 5010,
			
@@ -1601,6 +1603,10 @@ enum Op {
 
				     OpImageBoxFilterQCOM = 4481,
			
 
				     OpImageBlockMatchSSDQCOM = 4482,
			
 
				     OpImageBlockMatchSADQCOM = 4483,
			
 
				+    OpImageBlockMatchWindowSSDQCOM = 4500,
			
 
				+    OpImageBlockMatchWindowSADQCOM = 4501,
			
 
				+    OpImageBlockMatchGatherSSDQCOM = 4502,
			
 
				+    OpImageBlockMatchGatherSADQCOM = 4503,
			
 
				     OpGroupIAddNonUniformAMD = 5000,
			
 
				     OpGroupFAddNonUniformAMD = 5001,
			
 
				     OpGroupFMinNonUniformAMD = 5002,
			
@@ -2280,6 +2286,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
 
				     case OpImageBoxFilterQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpImageBlockMatchSSDQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpImageBlockMatchSADQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpImageBlockMatchWindowSSDQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpImageBlockMatchWindowSADQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpImageBlockMatchGatherSSDQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpImageBlockMatchGatherSADQCOM: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
			
--- a/thirdparty/spirv-cross/spirv_common.hpp
+++ b/thirdparty/spirv-cross/spirv_common.hpp
@@ -578,7 +578,9 @@ struct SPIRType : IVariant
 
				 		// Keep internal types at the end.
			
 
				 		ControlPointArray,
			
 
				 		Interpolant,
			
 
				-		Char
			
 
				+		Char,
			
 
				+		// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
			
 
				+		MeshGridProperties
			
 
				 	};
			
 
				 
			
 
				 	// Scalar/vector/matrix support.
			
@@ -746,6 +748,10 @@ struct SPIRExpression : IVariant
 
				 	// A list of expressions which this expression depends on.
			
 
				 	SmallVector<ID> expression_dependencies;
			
 
				 
			
 
				+	// Similar as expression dependencies, but does not stop the tracking for force-temporary variables.
			
 
				+	// We need to know the full chain from store back to any SSA variable.
			
 
				+	SmallVector<ID> invariance_dependencies;
			
 
				+
			
 
				 	// By reading this expression, we implicitly read these expressions as well.
			
 
				 	// Used by access chain Store and Load since we read multiple expressions in this case.
			
 
				 	SmallVector<ID> implied_read_expressions;
			
@@ -1598,6 +1604,8 @@ struct AccessChainMeta
 
				 	bool flattened_struct = false;
			
 
				 	bool relaxed_precision = false;
			
 
				 	bool access_meshlet_position_y = false;
			
 
				+	bool chain_is_builtin = false;
			
 
				+	spv::BuiltIn builtin = {};
			
 
				 };
			
 
				 
			
 
				 enum ExtendedDecorations
			
--- a/thirdparty/spirv-cross/spirv_cross.cpp
+++ b/thirdparty/spirv-cross/spirv_cross.cpp
@@ -1850,6 +1850,11 @@ const SmallVector<SPIRBlock::Case> &Compiler::get_case_list(const SPIRBlock &blo
 
				 		const auto &type = get<SPIRType>(constant->constant_type);
			
 
				 		width = type.width;
			
 
				 	}
			
 
				+	else if (const auto *op = maybe_get<SPIRConstantOp>(block.condition))
			
 
				+	{
			
 
				+		const auto &type = get<SPIRType>(op->basetype);
			
 
				+		width = type.width;
			
 
				+	}
			
 
				 	else if (const auto *var = maybe_get<SPIRVariable>(block.condition))
			
 
				 	{
			
 
				 		const auto &type = get<SPIRType>(var->basetype);
			
@@ -2564,6 +2569,15 @@ void Compiler::add_active_interface_variable(uint32_t var_id)
 
				 
			
 
				 void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
			
 
				 {
			
 
				+	auto *ptr_e = maybe_get<SPIRExpression>(dst);
			
 
				+
			
 
				+	if (is_position_invariant() && ptr_e && maybe_get<SPIRExpression>(source_expression))
			
 
				+	{
			
 
				+		auto &deps = ptr_e->invariance_dependencies;
			
 
				+		if (std::find(deps.begin(), deps.end(), source_expression) == deps.end())
			
 
				+			deps.push_back(source_expression);
			
 
				+	}
			
 
				+
			
 
				 	// Don't inherit any expression dependencies if the expression in dst
			
 
				 	// is not a forwarded temporary.
			
 
				 	if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) ||
			
@@ -2572,7 +2586,7 @@ void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_exp
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	auto &e = get<SPIRExpression>(dst);
			
 
				+	auto &e = *ptr_e;
			
 
				 	auto *phi = maybe_get<SPIRVariable>(source_expression);
			
 
				 	if (phi && phi->phi_variable)
			
 
				 	{
			
--- a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
+++ b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
@@ -564,7 +564,8 @@ Bitset ParsedIR::get_buffer_block_type_flags(const SPIRType &type) const
 
				 Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const
			
 
				 {
			
 
				 	auto &type = get<SPIRType>(var.basetype);
			
 
				-	assert(type.basetype == SPIRType::Struct);
			
 
				+	if (type.basetype != SPIRType::Struct)
			
 
				+		SPIRV_CROSS_THROW("Cannot get buffer block flags for non-buffer variable.");
			
 
				 
			
 
				 	// Some flags like non-writable, non-readable are actually found
			
 
				 	// as member decorations. If all members have a decoration set, propagate
			
@@ -927,6 +928,8 @@ void ParsedIR::reset_all_of_type(Types type)
 
				 
			
 
				 void ParsedIR::add_typed_id(Types type, ID id)
			
 
				 {
			
 
				+	assert(id < ids.size());
			
 
				+
			
 
				 	if (loop_iteration_depth_hard != 0)
			
 
				 		SPIRV_CROSS_THROW("Cannot add typed ID while looping over it.");
			
 
				 
			
@@ -1029,6 +1032,8 @@ ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_
 
				 
			
 
				 void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set)
			
 
				 {
			
 
				+	assert(id < ids.size());
			
 
				+
			
 
				 	auto &constant_type = get<SPIRType>(type);
			
 
				 
			
 
				 	if (constant_type.pointer)
			
--- a/thirdparty/spirv-cross/spirv_glsl.cpp
+++ b/thirdparty/spirv-cross/spirv_glsl.cpp
@@ -2764,6 +2764,8 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 
				 				block_qualifier = "patch ";
			
 
				 			else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
			
 
				 				block_qualifier = "perprimitiveEXT ";
			
 
				+			else if (has_decoration(var.self, DecorationPerVertexKHR))
			
 
				+				block_qualifier = "pervertexEXT ";
			
 
				 			else
			
 
				 				block_qualifier = "";
			
 
				 
			
@@ -3691,11 +3693,11 @@ void CompilerGLSL::emit_resources()
 
				 				auto &type = this->get<SPIRType>(undef.basetype);
			
 
				 				// OpUndef can be void for some reason ...
			
 
				 				if (type.basetype == SPIRType::Void)
			
 
				-					return;
			
 
				+					continue;
			
 
				 
			
 
				 				// This will break. It is bogus and should not be legal.
			
 
				 				if (type_is_top_level_block(type))
			
 
				-					return;
			
 
				+					continue;
			
 
				 
			
 
				 				string initializer;
			
 
				 				if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
			
@@ -6436,7 +6438,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 
				 		if (splat)
			
 
				 		{
			
 
				 			res += convert_to_string(c.scalar(vector, 0));
			
 
				-			if (is_legacy())
			
 
				+			if (is_legacy() && !has_extension("GL_EXT_gpu_shader4"))
			
 
				 			{
			
 
				 				// Fake unsigned constant literals with signed ones if possible.
			
 
				 				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
			
@@ -6455,7 +6457,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 
				 				else
			
 
				 				{
			
 
				 					res += convert_to_string(c.scalar(vector, i));
			
 
				-					if (is_legacy())
			
 
				+					if (is_legacy() && !has_extension("GL_EXT_gpu_shader4"))
			
 
				 					{
			
 
				 						// Fake unsigned constant literals with signed ones if possible.
			
 
				 						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
			
@@ -10208,6 +10210,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 	bool pending_array_enclose = false;
			
 
				 	bool dimension_flatten = false;
			
 
				 	bool access_meshlet_position_y = false;
			
 
				+	bool chain_is_builtin = false;
			
 
				+	spv::BuiltIn chained_builtin = {};
			
 
				 
			
 
				 	if (auto *base_expr = maybe_get<SPIRExpression>(base))
			
 
				 	{
			
@@ -10365,6 +10369,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 				auto builtin = ir.meta[base].decoration.builtin_type;
			
 
				 				bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
			
 
				 
			
 
				+				chain_is_builtin = true;
			
 
				+				chained_builtin = builtin;
			
 
				+
			
 
				 				switch (builtin)
			
 
				 				{
			
 
				 				case BuiltInCullDistance:
			
@@ -10500,6 +10507,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 					{
			
 
				 						access_meshlet_position_y = true;
			
 
				 					}
			
 
				+
			
 
				+					chain_is_builtin = true;
			
 
				+					chained_builtin = builtin;
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
@@ -10719,6 +10729,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
				 		meta->storage_physical_type = physical_type;
			
 
				 		meta->relaxed_precision = relaxed_precision;
			
 
				 		meta->access_meshlet_position_y = access_meshlet_position_y;
			
 
				+		meta->chain_is_builtin = chain_is_builtin;
			
 
				+		meta->builtin = chained_builtin;
			
 
				 	}
			
 
				 
			
 
				 	return expr;
			
@@ -11764,13 +11776,13 @@ void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression
 
				 	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
			
 
				 	// these will be marked as having suppressed usage tracking.
			
 
				 	// Our only concern is to make sure arithmetic operations are done in similar ways.
			
 
				-	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
			
 
				-	    forced_invariant_temporaries.count(expr.self) == 0)
			
 
				+	if (forced_invariant_temporaries.count(expr.self) == 0)
			
 
				 	{
			
 
				-		force_temporary_and_recompile(expr.self);
			
 
				+		if (!expression_suppresses_usage_tracking(expr.self))
			
 
				+			force_temporary_and_recompile(expr.self);
			
 
				 		forced_invariant_temporaries.insert(expr.self);
			
 
				 
			
 
				-		for (auto &dependent : expr.expression_dependencies)
			
 
				+		for (auto &dependent : expr.invariance_dependencies)
			
 
				 			disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
			
 
				 	}
			
 
				 }
			
@@ -12334,6 +12346,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 			flattened_structs[ops[1]] = true;
			
 
				 		if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
			
 
				 			set_decoration(ops[1], DecorationRelaxedPrecision);
			
 
				+		if (meta.chain_is_builtin)
			
 
				+			set_decoration(ops[1], DecorationBuiltIn, meta.builtin);
			
 
				 
			
 
				 		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
			
 
				 		// temporary which could be subject to invalidation.
			
@@ -13227,13 +13241,24 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		uint32_t op0 = ops[2];
			
 
				 		uint32_t op1 = ops[3];
			
 
				 
			
 
				-		// Needs special handling.
			
 
				+		auto &out_type = get<SPIRType>(result_type);
			
 
				+
			
 
				 		bool forward = should_forward(op0) && should_forward(op1);
			
 
				-		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
			
 
				-		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
			
 
				+		string cast_op0, cast_op1;
			
 
				+		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, int_type, op0, op1, false);
			
 
				+
			
 
				+		// Needs special handling.
			
 
				+		auto expr = join(cast_op0, " - ", cast_op1, " * ", "(", cast_op0, " / ", cast_op1, ")");
			
 
				 
			
 
				 		if (implicit_integer_promotion)
			
 
				+		{
			
 
				 			expr = join(type_to_glsl(get<SPIRType>(result_type)), '(', expr, ')');
			
 
				+		}
			
 
				+		else if (out_type.basetype != int_type)
			
 
				+		{
			
 
				+			expected_type.basetype = int_type;
			
 
				+			expr = join(bitcast_glsl_op(out_type, expected_type), '(', expr, ')');
			
 
				+		}
			
 
				 
			
 
				 		emit_op(result_type, result_id, expr, forward);
			
 
				 		inherit_expression_dependencies(result_id, op0);
			
@@ -14481,6 +14506,50 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	case OpImageBlockMatchWindowSSDQCOM:
			
 
				+	case OpImageBlockMatchWindowSADQCOM:
			
 
				+	case OpImageBlockMatchGatherSSDQCOM:
			
 
				+	case OpImageBlockMatchGatherSADQCOM:
			
 
				+	{
			
 
				+		require_extension_internal("GL_QCOM_image_processing2");
			
 
				+		uint32_t result_type_id = ops[0];
			
 
				+		uint32_t id = ops[1];
			
 
				+		string expr;
			
 
				+		switch (opcode)
			
 
				+		{
			
 
				+		case OpImageBlockMatchWindowSSDQCOM:
			
 
				+			expr = "textureBlockMatchWindowSSDQCOM";
			
 
				+			break;
			
 
				+		case OpImageBlockMatchWindowSADQCOM:
			
 
				+			expr = "textureBlockMatchWindowSADQCOM";
			
 
				+			break;
			
 
				+		case OpImageBlockMatchGatherSSDQCOM:
			
 
				+			expr = "textureBlockMatchGatherSSDQCOM";
			
 
				+			break;
			
 
				+		case OpImageBlockMatchGatherSADQCOM:
			
 
				+			expr = "textureBlockMatchGatherSADQCOM";
			
 
				+			break;
			
 
				+		default:
			
 
				+			SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing2.");
			
 
				+		}
			
 
				+		expr += "(";
			
 
				+
			
 
				+		bool forward = false;
			
 
				+		expr += to_expression(ops[2]);
			
 
				+		expr += ", " + to_expression(ops[3]);
			
 
				+
			
 
				+		expr += ", " + to_non_uniform_aware_expression(ops[4]);
			
 
				+		expr += ", " + to_expression(ops[5]);
			
 
				+		expr += ", " + to_expression(ops[6]);
			
 
				+
			
 
				+		expr += ")";
			
 
				+		emit_op(result_type_id, id, expr, forward);
			
 
				+
			
 
				+		inherit_expression_dependencies(id, ops[3]);
			
 
				+		inherit_expression_dependencies(id, ops[5]);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	// Compute
			
 
				 	case OpControlBarrier:
			
 
				 	case OpMemoryBarrier:
			
@@ -15622,7 +15691,16 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
 
				 
			
 
				 	if (type.pointer)
			
 
				 	{
			
 
				-		if (arg.write_count && arg.read_count)
			
 
				+		// If we're passing around block types to function, we really mean reference in a pointer sense,
			
 
				+		// but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct,
			
 
				+		// but it works in practice due to legalization. It's ... not great, but you gotta do what you gotta do.
			
 
				+		// GLSL will never hit this case since it's not valid.
			
 
				+		if (type.storage == StorageClassOutput && get_execution_model() == ExecutionModelMeshEXT &&
			
 
				+		    has_decoration(type.self, DecorationBlock) && is_builtin_type(type) && arg.write_count)
			
 
				+		{
			
 
				+			direction = "out ";
			
 
				+		}
			
 
				+		else if (arg.write_count && arg.read_count)
			
 
				 			direction = "inout ";
			
 
				 		else if (arg.write_count)
			
 
				 			direction = "out ";
			
@@ -15899,7 +15977,7 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*m
 
				 	case DimBuffer:
			
 
				 		if (options.es && options.version < 320)
			
 
				 			require_extension_internal("GL_EXT_texture_buffer");
			
 
				-		else if (!options.es && options.version < 300)
			
 
				+		else if (!options.es && options.version < 140)
			
 
				 			require_extension_internal("GL_EXT_texture_buffer_object");
			
 
				 		res += "Buffer";
			
 
				 		break;
			
@@ -16442,6 +16520,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 
				 	{
			
 
				 		auto &var = get<SPIRVariable>(v);
			
 
				 		var.deferred_declaration = false;
			
 
				+		if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
			
 
				+			continue;
			
 
				 
			
 
				 		if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
			
 
				 		{
			
@@ -17608,7 +17688,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
				 
			
 
				 		if (!collapsed_switch)
			
 
				 		{
			
 
				-			if (block_like_switch || is_legacy_es())
			
 
				+			if (block_like_switch || is_legacy())
			
 
				 			{
			
 
				 				// ESSL 1.0 is not guaranteed to support do/while.
			
 
				 				if (is_legacy_es())
			
@@ -17638,7 +17718,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
				 				// Default case.
			
 
				 				if (!block_like_switch)
			
 
				 				{
			
 
				-					if (is_legacy_es())
			
 
				+					if (is_legacy())
			
 
				 						statement("else");
			
 
				 					else
			
 
				 						statement("default:");
			
@@ -17646,7 +17726,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				if (is_legacy_es())
			
 
				+				if (is_legacy())
			
 
				 				{
			
 
				 					statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
			
 
				 					          ")");
			
@@ -17698,7 +17778,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
				 
			
 
				 			if (block.default_block == block.next_block)
			
 
				 			{
			
 
				-				if (is_legacy_es())
			
 
				+				if (is_legacy())
			
 
				 					statement("else");
			
 
				 				else
			
 
				 					statement("default:");
			
@@ -17712,7 +17792,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
				 
			
 
				 		if (!collapsed_switch)
			
 
				 		{
			
 
				-			if (block_like_switch && !is_legacy_es())
			
 
				+			if ((block_like_switch || is_legacy()) && !is_legacy_es())
			
 
				 				end_scope_decl("while(false)");
			
 
				 			else
			
 
				 				end_scope();
			
--- a/thirdparty/spirv-cross/spirv_msl.cpp
+++ b/thirdparty/spirv-cross/spirv_msl.cpp
--- a/thirdparty/spirv-cross/spirv_msl.hpp
+++ b/thirdparty/spirv-cross/spirv_msl.hpp
@@ -838,7 +838,9 @@ protected:
 
				 		SPVFuncImplPaddedStd140,
			
 
				 		SPVFuncImplReduceAdd,
			
 
				 		SPVFuncImplImageFence,
			
 
				-		SPVFuncImplTextureCast
			
 
				+		SPVFuncImplTextureCast,
			
 
				+		SPVFuncImplMulExtended,
			
 
				+		SPVFuncImplSetMeshOutputsEXT,
			
 
				 	};
			
 
				 
			
 
				 	// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
			
@@ -867,6 +869,9 @@ protected:
 
				 	std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member);
			
 
				 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
			
 
				 	void emit_block_hints(const SPIRBlock &block) override;
			
 
				+	void emit_mesh_entry_point();
			
 
				+	void emit_mesh_outputs();
			
 
				+	void emit_mesh_tasks(SPIRBlock &block) override;
			
 
				 
			
 
				 	// Allow Metal to use the array<T> template to make arrays a value type
			
 
				 	std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override;
			
@@ -918,6 +923,7 @@ protected:
 
				 
			
 
				 	bool is_tesc_shader() const;
			
 
				 	bool is_tese_shader() const;
			
 
				+	bool is_mesh_shader() const;
			
 
				 
			
 
				 	void preprocess_op_codes();
			
 
				 	void localize_global_variables();
			
@@ -932,6 +938,7 @@ protected:
 
				 	                                            std::unordered_set<uint32_t> &processed_func_ids);
			
 
				 	uint32_t add_interface_block(spv::StorageClass storage, bool patch = false);
			
 
				 	uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage);
			
 
				+	uint32_t add_meshlet_block(bool per_primitive);
			
 
				 
			
 
				 	struct InterfaceBlockMeta
			
 
				 	{
			
@@ -1103,12 +1110,17 @@ protected:
 
				 	uint32_t builtin_stage_input_size_id = 0;
			
 
				 	uint32_t builtin_local_invocation_index_id = 0;
			
 
				 	uint32_t builtin_workgroup_size_id = 0;
			
 
				+	uint32_t builtin_mesh_primitive_indices_id = 0;
			
 
				+	uint32_t builtin_mesh_sizes_id = 0;
			
 
				+	uint32_t builtin_task_grid_id = 0;
			
 
				 	uint32_t builtin_frag_depth_id = 0;
			
 
				 	uint32_t swizzle_buffer_id = 0;
			
 
				 	uint32_t buffer_size_buffer_id = 0;
			
 
				 	uint32_t view_mask_buffer_id = 0;
			
 
				 	uint32_t dynamic_offsets_buffer_id = 0;
			
 
				 	uint32_t uint_type_id = 0;
			
 
				+	uint32_t shared_uint_type_id = 0;
			
 
				+	uint32_t meshlet_type_id = 0;
			
 
				 	uint32_t argument_buffer_padding_buffer_type_id = 0;
			
 
				 	uint32_t argument_buffer_padding_image_type_id = 0;
			
 
				 	uint32_t argument_buffer_padding_sampler_type_id = 0;
			
@@ -1173,6 +1185,8 @@ protected:
 
				 	VariableID stage_out_ptr_var_id = 0;
			
 
				 	VariableID tess_level_inner_var_id = 0;
			
 
				 	VariableID tess_level_outer_var_id = 0;
			
 
				+	VariableID mesh_out_per_vertex = 0;
			
 
				+	VariableID mesh_out_per_primitive = 0;
			
 
				 	VariableID stage_out_masked_builtin_type_id = 0;
			
 
				 
			
 
				 	// Handle HLSL-style 0-based vertex/instance index.
			
--- a/thirdparty/spirv-cross/spirv_reflect.cpp
+++ b/thirdparty/spirv-cross/spirv_reflect.cpp
@@ -637,6 +637,8 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resou
 
				 			json_stream->emit_json_key_value("WeightTextureQCOM", get_decoration(res.id, DecorationWeightTextureQCOM));
			
 
				 		if (mask.get(DecorationBlockMatchTextureQCOM))
			
 
				 			json_stream->emit_json_key_value("BlockMatchTextureQCOM", get_decoration(res.id, DecorationBlockMatchTextureQCOM));
			
 
				+		if (mask.get(DecorationBlockMatchSamplerQCOM))
			
 
				+			json_stream->emit_json_key_value("BlockMatchSamplerQCOM", get_decoration(res.id, DecorationBlockMatchSamplerQCOM));
			
 
				 
			
 
				 		// For images, the type itself adds a layout qualifer.
			
 
				 		// Only emit the format for storage images.