hai 3 meses · 686190d7ff
--- a/3rdparty/spirv-cross/main.cpp
+++ b/3rdparty/spirv-cross/main.cpp
@@ -536,6 +536,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 
				 	print_resources(compiler, "push", res.push_constant_buffers);
			
 
				 	print_resources(compiler, "counters", res.atomic_counters);
			
 
				 	print_resources(compiler, "acceleration structures", res.acceleration_structures);
			
 
				+	print_resources(compiler, "tensors", res.tensors);
			
 
				 	print_resources(compiler, "record buffers", res.shader_record_buffers);
			
 
				 	print_resources(compiler, spv::StorageClassInput, res.builtin_inputs);
			
 
				 	print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs);
			
--- a/3rdparty/spirv-cross/spirv.h
+++ b/3rdparty/spirv-cross/spirv.h
@@ -1,27 +1,11 @@
 
				 /*
			
 
				-** Copyright (c) 2014-2024 The Khronos Group Inc.
			
 
				+** Copyright: 2014-2024 The Khronos Group Inc.
			
 
				+** License: MIT
			
 
				 ** 
			
 
				-** Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				-** of this software and/or associated documentation files (the "Materials"),
			
 
				-** to deal in the Materials without restriction, including without limitation
			
 
				-** the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				-** and/or sell copies of the Materials, and to permit persons to whom the
			
 
				-** Materials are furnished to do so, subject to the following conditions:
			
 
				-** 
			
 
				-** The above copyright notice and this permission notice shall be included in
			
 
				-** all copies or substantial portions of the Materials.
			
 
				-** 
			
 
				-** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
			
 
				-** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
			
 
				-** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
			
 
				-** 
			
 
				-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
			
 
				-** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				-** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				-** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
			
 
				-** IN THE MATERIALS.
			
 
				+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
			
 
				+** KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
			
 
				+** SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
			
 
				+** https://www.khronos.org/registry/
			
 
				 */
			
 
				 
			
 
				 /*
			
@@ -176,6 +160,8 @@ typedef enum SpvExecutionMode_ {
 
				     SpvExecutionModeSignedZeroInfNanPreserve = 4461,
			
 
				     SpvExecutionModeRoundingModeRTE = 4462,
			
 
				     SpvExecutionModeRoundingModeRTZ = 4463,
			
 
				+    SpvExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
			
 
				+    SpvExecutionModeTileShadingRateQCOM = 4490,
			
 
				     SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
			
 
				     SpvExecutionModeStencilRefReplacingEXT = 5027,
			
 
				     SpvExecutionModeCoalescingAMDX = 5069,
			
@@ -245,6 +231,7 @@ typedef enum SpvStorageClass_ {
 
				     SpvStorageClassImage = 11,
			
 
				     SpvStorageClassStorageBuffer = 12,
			
 
				     SpvStorageClassTileImageEXT = 4172,
			
 
				+    SpvStorageClassTileAttachmentQCOM = 4491,
			
 
				     SpvStorageClassNodePayloadAMDX = 5068,
			
 
				     SpvStorageClassCallableDataKHR = 5328,
			
 
				     SpvStorageClassCallableDataNV = 5328,
			
@@ -554,6 +541,7 @@ typedef enum SpvDecoration_ {
 
				     SpvDecorationMaxByteOffset = 45,
			
 
				     SpvDecorationAlignmentId = 46,
			
 
				     SpvDecorationMaxByteOffsetId = 47,
			
 
				+    SpvDecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
			
 
				     SpvDecorationNoSignedWrap = 4469,
			
 
				     SpvDecorationNoUnsignedWrap = 4470,
			
 
				     SpvDecorationWeightTextureQCOM = 4487,
			
@@ -723,6 +711,9 @@ typedef enum SpvBuiltIn_ {
 
				     SpvBuiltInDeviceIndex = 4438,
			
 
				     SpvBuiltInViewIndex = 4440,
			
 
				     SpvBuiltInShadingRateKHR = 4444,
			
 
				+    SpvBuiltInTileOffsetQCOM = 4492,
			
 
				+    SpvBuiltInTileDimensionQCOM = 4493,
			
 
				+    SpvBuiltInTileApronSizeQCOM = 4494,
			
 
				     SpvBuiltInBaryCoordNoPerspAMD = 4992,
			
 
				     SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993,
			
 
				     SpvBuiltInBaryCoordNoPerspSampleAMD = 4994,
			
@@ -1073,7 +1064,13 @@ typedef enum SpvCapability_ {
 
				     SpvCapabilityTileImageColorReadAccessEXT = 4166,
			
 
				     SpvCapabilityTileImageDepthReadAccessEXT = 4167,
			
 
				     SpvCapabilityTileImageStencilReadAccessEXT = 4168,
			
 
				+    SpvCapabilityTensorsARM = 4174,
			
 
				+    SpvCapabilityStorageTensorArrayDynamicIndexingARM = 4175,
			
 
				+    SpvCapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
			
 
				+    SpvCapabilityGraphARM = 4191,
			
 
				     SpvCapabilityCooperativeMatrixLayoutsARM = 4201,
			
 
				+    SpvCapabilityFloat8EXT = 4212,
			
 
				+    SpvCapabilityFloat8CooperativeMatrixEXT = 4213,
			
 
				     SpvCapabilityFragmentShadingRateKHR = 4422,
			
 
				     SpvCapabilitySubgroupBallotKHR = 4423,
			
 
				     SpvCapabilityDrawParameters = 4427,
			
@@ -1109,6 +1106,7 @@ typedef enum SpvCapability_ {
 
				     SpvCapabilityTextureSampleWeightedQCOM = 4484,
			
 
				     SpvCapabilityTextureBoxFilterQCOM = 4485,
			
 
				     SpvCapabilityTextureBlockMatchQCOM = 4486,
			
 
				+    SpvCapabilityTileShadingQCOM = 4495,
			
 
				     SpvCapabilityTextureBlockMatch2QCOM = 4498,
			
 
				     SpvCapabilityFloat16ImageAMD = 5008,
			
 
				     SpvCapabilityImageGatherBiasLodAMD = 5009,
			
@@ -1119,6 +1117,8 @@ typedef enum SpvCapability_ {
 
				     SpvCapabilityShaderClockKHR = 5055,
			
 
				     SpvCapabilityShaderEnqueueAMDX = 5067,
			
 
				     SpvCapabilityQuadControlKHR = 5087,
			
 
				+    SpvCapabilityInt4TypeINTEL = 5112,
			
 
				+    SpvCapabilityInt4CooperativeMatrixINTEL = 5114,
			
 
				     SpvCapabilityBFloat16TypeKHR = 5116,
			
 
				     SpvCapabilityBFloat16DotProductKHR = 5117,
			
 
				     SpvCapabilityBFloat16CooperativeMatrixKHR = 5118,
			
@@ -1287,6 +1287,7 @@ typedef enum SpvCapability_ {
 
				     SpvCapabilityMaskedGatherScatterINTEL = 6427,
			
 
				     SpvCapabilityCacheControlsINTEL = 6441,
			
 
				     SpvCapabilityRegisterLimitsINTEL = 6460,
			
 
				+    SpvCapabilityBindlessImagesINTEL = 6528,
			
 
				     SpvCapabilityMax = 0x7fffffff,
			
 
				 } SpvCapability;
			
 
				 
			
@@ -1463,6 +1464,24 @@ typedef enum SpvTensorAddressingOperandsMask_ {
 
				     SpvTensorAddressingOperandsDecodeFuncMask = 0x00000002,
			
 
				 } SpvTensorAddressingOperandsMask;
			
 
				 
			
 
				+typedef enum SpvTensorOperandsShift_ {
			
 
				+    SpvTensorOperandsNontemporalARMShift = 0,
			
 
				+    SpvTensorOperandsOutOfBoundsValueARMShift = 1,
			
 
				+    SpvTensorOperandsMakeElementAvailableARMShift = 2,
			
 
				+    SpvTensorOperandsMakeElementVisibleARMShift = 3,
			
 
				+    SpvTensorOperandsNonPrivateElementARMShift = 4,
			
 
				+    SpvTensorOperandsMax = 0x7fffffff,
			
 
				+} SpvTensorOperandsShift;
			
 
				+
			
 
				+typedef enum SpvTensorOperandsMask_ {
			
 
				+    SpvTensorOperandsMaskNone = 0,
			
 
				+    SpvTensorOperandsNontemporalARMMask = 0x00000001,
			
 
				+    SpvTensorOperandsOutOfBoundsValueARMMask = 0x00000002,
			
 
				+    SpvTensorOperandsMakeElementAvailableARMMask = 0x00000004,
			
 
				+    SpvTensorOperandsMakeElementVisibleARMMask = 0x00000008,
			
 
				+    SpvTensorOperandsNonPrivateElementARMMask = 0x00000010,
			
 
				+} SpvTensorOperandsMask;
			
 
				+
			
 
				 typedef enum SpvInitializationModeQualifier_ {
			
 
				     SpvInitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
			
 
				     SpvInitializationModeQualifierInitOnDeviceResetINTEL = 1,
			
@@ -1549,6 +1568,8 @@ typedef enum SpvRawAccessChainOperandsMask_ {
 
				 
			
 
				 typedef enum SpvFPEncoding_ {
			
 
				     SpvFPEncodingBFloat16KHR = 0,
			
 
				+    SpvFPEncodingFloat8E4M3EXT = 4214,
			
 
				+    SpvFPEncodingFloat8E5M2EXT = 4215,
			
 
				     SpvFPEncodingMax = 0x7fffffff,
			
 
				 } SpvFPEncoding;
			
 
				 
			
@@ -1927,6 +1948,17 @@ typedef enum SpvOp_ {
 
				     SpvOpColorAttachmentReadEXT = 4160,
			
 
				     SpvOpDepthAttachmentReadEXT = 4161,
			
 
				     SpvOpStencilAttachmentReadEXT = 4162,
			
 
				+    SpvOpTypeTensorARM = 4163,
			
 
				+    SpvOpTensorReadARM = 4164,
			
 
				+    SpvOpTensorWriteARM = 4165,
			
 
				+    SpvOpTensorQuerySizeARM = 4166,
			
 
				+    SpvOpGraphConstantARM = 4181,
			
 
				+    SpvOpGraphEntryPointARM = 4182,
			
 
				+    SpvOpGraphARM = 4183,
			
 
				+    SpvOpGraphInputARM = 4184,
			
 
				+    SpvOpGraphSetOutputARM = 4185,
			
 
				+    SpvOpGraphEndARM = 4186,
			
 
				+    SpvOpTypeGraphARM = 4190,
			
 
				     SpvOpTerminateInvocation = 4416,
			
 
				     SpvOpTypeUntypedPointerKHR = 4417,
			
 
				     SpvOpUntypedVariableKHR = 4418,
			
@@ -2385,6 +2417,9 @@ typedef enum SpvOp_ {
 
				     SpvOpRoundFToTF32INTEL = 6426,
			
 
				     SpvOpMaskedGatherINTEL = 6428,
			
 
				     SpvOpMaskedScatterINTEL = 6429,
			
 
				+    SpvOpConvertHandleToImageINTEL = 6529,
			
 
				+    SpvOpConvertHandleToSamplerINTEL = 6530,
			
 
				+    SpvOpConvertHandleToSampledImageINTEL = 6531,
			
 
				     SpvOpMax = 0x7fffffff,
			
 
				 } SpvOp;
			
 
				 
			
@@ -2743,6 +2778,17 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
 
				     case SpvOpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
			
 
				     case SpvOpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
			
 
				     case SpvOpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
			
 
				+    case SpvOpTensorReadARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case SpvOpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case SpvOpGraphARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpGraphInputARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case SpvOpGraphEndARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case SpvOpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
			
 
				     case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
			
 
				     case SpvOpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
			
 
				     case SpvOpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
			
@@ -3190,6 +3236,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
 
				     case SpvOpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
			
 
				     case SpvOpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				     case SpvOpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
			
 
				+    case SpvOpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				+    case SpvOpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				     }
			
 
				 }
			
 
				 inline const char* SpvSourceLanguageToString(SpvSourceLanguage value) {
			
@@ -3305,6 +3354,8 @@ inline const char* SpvExecutionModeToString(SpvExecutionMode value) {
 
				     case SpvExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
			
 
				     case SpvExecutionModeRoundingModeRTE: return "RoundingModeRTE";
			
 
				     case SpvExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
			
 
				+    case SpvExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
			
 
				+    case SpvExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
			
 
				     case SpvExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
			
 
				     case SpvExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
			
 
				     case SpvExecutionModeCoalescingAMDX: return "CoalescingAMDX";
			
@@ -3371,6 +3422,7 @@ inline const char* SpvStorageClassToString(SpvStorageClass value) {
 
				     case SpvStorageClassImage: return "Image";
			
 
				     case SpvStorageClassStorageBuffer: return "StorageBuffer";
			
 
				     case SpvStorageClassTileImageEXT: return "TileImageEXT";
			
 
				+    case SpvStorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
			
 
				     case SpvStorageClassNodePayloadAMDX: return "NodePayloadAMDX";
			
 
				     case SpvStorageClassCallableDataKHR: return "CallableDataKHR";
			
 
				     case SpvStorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
			
@@ -3619,6 +3671,7 @@ inline const char* SpvDecorationToString(SpvDecoration value) {
 
				     case SpvDecorationMaxByteOffset: return "MaxByteOffset";
			
 
				     case SpvDecorationAlignmentId: return "AlignmentId";
			
 
				     case SpvDecorationMaxByteOffsetId: return "MaxByteOffsetId";
			
 
				+    case SpvDecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
			
 
				     case SpvDecorationNoSignedWrap: return "NoSignedWrap";
			
 
				     case SpvDecorationNoUnsignedWrap: return "NoUnsignedWrap";
			
 
				     case SpvDecorationWeightTextureQCOM: return "WeightTextureQCOM";
			
@@ -3778,6 +3831,9 @@ inline const char* SpvBuiltInToString(SpvBuiltIn value) {
 
				     case SpvBuiltInDeviceIndex: return "DeviceIndex";
			
 
				     case SpvBuiltInViewIndex: return "ViewIndex";
			
 
				     case SpvBuiltInShadingRateKHR: return "ShadingRateKHR";
			
 
				+    case SpvBuiltInTileOffsetQCOM: return "TileOffsetQCOM";
			
 
				+    case SpvBuiltInTileDimensionQCOM: return "TileDimensionQCOM";
			
 
				+    case SpvBuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
			
 
				     case SpvBuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
			
 
				     case SpvBuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
			
 
				     case SpvBuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
			
@@ -3958,7 +4014,13 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
 
				     case SpvCapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
			
 
				     case SpvCapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
			
 
				     case SpvCapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
			
 
				+    case SpvCapabilityTensorsARM: return "TensorsARM";
			
 
				+    case SpvCapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
			
 
				+    case SpvCapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
			
 
				+    case SpvCapabilityGraphARM: return "GraphARM";
			
 
				     case SpvCapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
			
 
				+    case SpvCapabilityFloat8EXT: return "Float8EXT";
			
 
				+    case SpvCapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
			
 
				     case SpvCapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
			
 
				     case SpvCapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
			
 
				     case SpvCapabilityDrawParameters: return "DrawParameters";
			
@@ -3992,6 +4054,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
 
				     case SpvCapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
			
 
				     case SpvCapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
			
 
				     case SpvCapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
			
 
				+    case SpvCapabilityTileShadingQCOM: return "TileShadingQCOM";
			
 
				     case SpvCapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
			
 
				     case SpvCapabilityFloat16ImageAMD: return "Float16ImageAMD";
			
 
				     case SpvCapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
			
@@ -4002,6 +4065,8 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
 
				     case SpvCapabilityShaderClockKHR: return "ShaderClockKHR";
			
 
				     case SpvCapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
			
 
				     case SpvCapabilityQuadControlKHR: return "QuadControlKHR";
			
 
				+    case SpvCapabilityInt4TypeINTEL: return "Int4TypeINTEL";
			
 
				+    case SpvCapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
			
 
				     case SpvCapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
			
 
				     case SpvCapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
			
 
				     case SpvCapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
			
@@ -4144,6 +4209,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
 
				     case SpvCapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
			
 
				     case SpvCapabilityCacheControlsINTEL: return "CacheControlsINTEL";
			
 
				     case SpvCapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
			
 
				+    case SpvCapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
			
 
				     default: return "Unknown";
			
 
				     }
			
 
				 }
			
@@ -4299,6 +4365,8 @@ inline const char* SpvNamedMaximumNumberOfRegistersToString(SpvNamedMaximumNumbe
 
				 inline const char* SpvFPEncodingToString(SpvFPEncoding value) {
			
 
				     switch (value) {
			
 
				     case SpvFPEncodingBFloat16KHR: return "BFloat16KHR";
			
 
				+    case SpvFPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
			
 
				+    case SpvFPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
			
 
				     default: return "Unknown";
			
 
				     }
			
 
				 }
			
@@ -4683,6 +4751,17 @@ inline const char* SpvOpToString(SpvOp value) {
 
				     case SpvOpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
			
 
				     case SpvOpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
			
 
				     case SpvOpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
			
 
				+    case SpvOpTypeTensorARM: return "OpTypeTensorARM";
			
 
				+    case SpvOpTensorReadARM: return "OpTensorReadARM";
			
 
				+    case SpvOpTensorWriteARM: return "OpTensorWriteARM";
			
 
				+    case SpvOpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
			
 
				+    case SpvOpGraphConstantARM: return "OpGraphConstantARM";
			
 
				+    case SpvOpGraphEntryPointARM: return "OpGraphEntryPointARM";
			
 
				+    case SpvOpGraphARM: return "OpGraphARM";
			
 
				+    case SpvOpGraphInputARM: return "OpGraphInputARM";
			
 
				+    case SpvOpGraphSetOutputARM: return "OpGraphSetOutputARM";
			
 
				+    case SpvOpGraphEndARM: return "OpGraphEndARM";
			
 
				+    case SpvOpTypeGraphARM: return "OpTypeGraphARM";
			
 
				     case SpvOpTerminateInvocation: return "OpTerminateInvocation";
			
 
				     case SpvOpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
			
 
				     case SpvOpUntypedVariableKHR: return "OpUntypedVariableKHR";
			
@@ -5130,6 +5209,9 @@ inline const char* SpvOpToString(SpvOp value) {
 
				     case SpvOpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
			
 
				     case SpvOpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
			
 
				     case SpvOpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
			
 
				+    case SpvOpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
			
 
				+    case SpvOpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
			
 
				+    case SpvOpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
			
 
				     default: return "Unknown";
			
 
				     }
			
 
				 }
			
--- a/3rdparty/spirv-cross/spirv.hpp
+++ b/3rdparty/spirv-cross/spirv.hpp
@@ -1,26 +1,10 @@
 
				-// Copyright (c) 2014-2024 The Khronos Group Inc.
			
 
				+// Copyright: 2014-2024 The Khronos Group Inc.
			
 
				+// License: MIT
			
 
				 // 
			
 
				-// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				-// of this software and/or associated documentation files (the "Materials"),
			
 
				-// to deal in the Materials without restriction, including without limitation
			
 
				-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				-// and/or sell copies of the Materials, and to permit persons to whom the
			
 
				-// Materials are furnished to do so, subject to the following conditions:
			
 
				-// 
			
 
				-// The above copyright notice and this permission notice shall be included in
			
 
				-// all copies or substantial portions of the Materials.
			
 
				-// 
			
 
				-// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
			
 
				-// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
			
 
				-// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
			
 
				-// 
			
 
				-// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
			
 
				-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				-// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
			
 
				-// IN THE MATERIALS.
			
 
				+// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
			
 
				+// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
			
 
				+// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
			
 
				+// https://www.khronos.org/registry/
			
 
				 
			
 
				 // This header is automatically generated by the same tool that creates
			
 
				 // the Binary Section of the SPIR-V specification.
			
@@ -172,6 +156,8 @@ enum ExecutionMode {
 
				     ExecutionModeSignedZeroInfNanPreserve = 4461,
			
 
				     ExecutionModeRoundingModeRTE = 4462,
			
 
				     ExecutionModeRoundingModeRTZ = 4463,
			
 
				+    ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
			
 
				+    ExecutionModeTileShadingRateQCOM = 4490,
			
 
				     ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
			
 
				     ExecutionModeStencilRefReplacingEXT = 5027,
			
 
				     ExecutionModeCoalescingAMDX = 5069,
			
@@ -241,6 +227,7 @@ enum StorageClass {
 
				     StorageClassImage = 11,
			
 
				     StorageClassStorageBuffer = 12,
			
 
				     StorageClassTileImageEXT = 4172,
			
 
				+    StorageClassTileAttachmentQCOM = 4491,
			
 
				     StorageClassNodePayloadAMDX = 5068,
			
 
				     StorageClassCallableDataKHR = 5328,
			
 
				     StorageClassCallableDataNV = 5328,
			
@@ -550,6 +537,7 @@ enum Decoration {
 
				     DecorationMaxByteOffset = 45,
			
 
				     DecorationAlignmentId = 46,
			
 
				     DecorationMaxByteOffsetId = 47,
			
 
				+    DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
			
 
				     DecorationNoSignedWrap = 4469,
			
 
				     DecorationNoUnsignedWrap = 4470,
			
 
				     DecorationWeightTextureQCOM = 4487,
			
@@ -719,6 +707,9 @@ enum BuiltIn {
 
				     BuiltInDeviceIndex = 4438,
			
 
				     BuiltInViewIndex = 4440,
			
 
				     BuiltInShadingRateKHR = 4444,
			
 
				+    BuiltInTileOffsetQCOM = 4492,
			
 
				+    BuiltInTileDimensionQCOM = 4493,
			
 
				+    BuiltInTileApronSizeQCOM = 4494,
			
 
				     BuiltInBaryCoordNoPerspAMD = 4992,
			
 
				     BuiltInBaryCoordNoPerspCentroidAMD = 4993,
			
 
				     BuiltInBaryCoordNoPerspSampleAMD = 4994,
			
@@ -1069,7 +1060,13 @@ enum Capability {
 
				     CapabilityTileImageColorReadAccessEXT = 4166,
			
 
				     CapabilityTileImageDepthReadAccessEXT = 4167,
			
 
				     CapabilityTileImageStencilReadAccessEXT = 4168,
			
 
				+    CapabilityTensorsARM = 4174,
			
 
				+    CapabilityStorageTensorArrayDynamicIndexingARM = 4175,
			
 
				+    CapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
			
 
				+    CapabilityGraphARM = 4191,
			
 
				     CapabilityCooperativeMatrixLayoutsARM = 4201,
			
 
				+    CapabilityFloat8EXT = 4212,
			
 
				+    CapabilityFloat8CooperativeMatrixEXT = 4213,
			
 
				     CapabilityFragmentShadingRateKHR = 4422,
			
 
				     CapabilitySubgroupBallotKHR = 4423,
			
 
				     CapabilityDrawParameters = 4427,
			
@@ -1105,6 +1102,7 @@ enum Capability {
 
				     CapabilityTextureSampleWeightedQCOM = 4484,
			
 
				     CapabilityTextureBoxFilterQCOM = 4485,
			
 
				     CapabilityTextureBlockMatchQCOM = 4486,
			
 
				+    CapabilityTileShadingQCOM = 4495,
			
 
				     CapabilityTextureBlockMatch2QCOM = 4498,
			
 
				     CapabilityFloat16ImageAMD = 5008,
			
 
				     CapabilityImageGatherBiasLodAMD = 5009,
			
@@ -1115,6 +1113,8 @@ enum Capability {
 
				     CapabilityShaderClockKHR = 5055,
			
 
				     CapabilityShaderEnqueueAMDX = 5067,
			
 
				     CapabilityQuadControlKHR = 5087,
			
 
				+    CapabilityInt4TypeINTEL = 5112,
			
 
				+    CapabilityInt4CooperativeMatrixINTEL = 5114,
			
 
				     CapabilityBFloat16TypeKHR = 5116,
			
 
				     CapabilityBFloat16DotProductKHR = 5117,
			
 
				     CapabilityBFloat16CooperativeMatrixKHR = 5118,
			
@@ -1283,6 +1283,7 @@ enum Capability {
 
				     CapabilityMaskedGatherScatterINTEL = 6427,
			
 
				     CapabilityCacheControlsINTEL = 6441,
			
 
				     CapabilityRegisterLimitsINTEL = 6460,
			
 
				+    CapabilityBindlessImagesINTEL = 6528,
			
 
				     CapabilityMax = 0x7fffffff,
			
 
				 };
			
 
				 
			
@@ -1459,6 +1460,24 @@ enum TensorAddressingOperandsMask {
 
				     TensorAddressingOperandsDecodeFuncMask = 0x00000002,
			
 
				 };
			
 
				 
			
 
				+enum TensorOperandsShift {
			
 
				+    TensorOperandsNontemporalARMShift = 0,
			
 
				+    TensorOperandsOutOfBoundsValueARMShift = 1,
			
 
				+    TensorOperandsMakeElementAvailableARMShift = 2,
			
 
				+    TensorOperandsMakeElementVisibleARMShift = 3,
			
 
				+    TensorOperandsNonPrivateElementARMShift = 4,
			
 
				+    TensorOperandsMax = 0x7fffffff,
			
 
				+};
			
 
				+
			
 
				+enum TensorOperandsMask {
			
 
				+    TensorOperandsMaskNone = 0,
			
 
				+    TensorOperandsNontemporalARMMask = 0x00000001,
			
 
				+    TensorOperandsOutOfBoundsValueARMMask = 0x00000002,
			
 
				+    TensorOperandsMakeElementAvailableARMMask = 0x00000004,
			
 
				+    TensorOperandsMakeElementVisibleARMMask = 0x00000008,
			
 
				+    TensorOperandsNonPrivateElementARMMask = 0x00000010,
			
 
				+};
			
 
				+
			
 
				 enum InitializationModeQualifier {
			
 
				     InitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
			
 
				     InitializationModeQualifierInitOnDeviceResetINTEL = 1,
			
@@ -1545,6 +1564,8 @@ enum RawAccessChainOperandsMask {
 
				 
			
 
				 enum FPEncoding {
			
 
				     FPEncodingBFloat16KHR = 0,
			
 
				+    FPEncodingFloat8E4M3EXT = 4214,
			
 
				+    FPEncodingFloat8E5M2EXT = 4215,
			
 
				     FPEncodingMax = 0x7fffffff,
			
 
				 };
			
 
				 
			
@@ -1923,6 +1944,17 @@ enum Op {
 
				     OpColorAttachmentReadEXT = 4160,
			
 
				     OpDepthAttachmentReadEXT = 4161,
			
 
				     OpStencilAttachmentReadEXT = 4162,
			
 
				+    OpTypeTensorARM = 4163,
			
 
				+    OpTensorReadARM = 4164,
			
 
				+    OpTensorWriteARM = 4165,
			
 
				+    OpTensorQuerySizeARM = 4166,
			
 
				+    OpGraphConstantARM = 4181,
			
 
				+    OpGraphEntryPointARM = 4182,
			
 
				+    OpGraphARM = 4183,
			
 
				+    OpGraphInputARM = 4184,
			
 
				+    OpGraphSetOutputARM = 4185,
			
 
				+    OpGraphEndARM = 4186,
			
 
				+    OpTypeGraphARM = 4190,
			
 
				     OpTerminateInvocation = 4416,
			
 
				     OpTypeUntypedPointerKHR = 4417,
			
 
				     OpUntypedVariableKHR = 4418,
			
@@ -2381,6 +2413,9 @@ enum Op {
 
				     OpRoundFToTF32INTEL = 6426,
			
 
				     OpMaskedGatherINTEL = 6428,
			
 
				     OpMaskedScatterINTEL = 6429,
			
 
				+    OpConvertHandleToImageINTEL = 6529,
			
 
				+    OpConvertHandleToSamplerINTEL = 6530,
			
 
				+    OpConvertHandleToSampledImageINTEL = 6531,
			
 
				     OpMax = 0x7fffffff,
			
 
				 };
			
 
				 
			
@@ -2739,6 +2774,17 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
 
				     case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
			
 
				+    case OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case OpGraphARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpGraphInputARM: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case OpGraphEndARM: *hasResult = false; *hasResultType = false; break;
			
 
				+    case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
			
 
				     case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
			
 
				     case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
			
 
				     case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
			
@@ -3186,6 +3232,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
 
				     case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				     case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
			
 
				+    case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				+    case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
			
 
				     }
			
 
				 }
			
 
				 inline const char* SourceLanguageToString(SourceLanguage value) {
			
@@ -3301,6 +3350,8 @@ inline const char* ExecutionModeToString(ExecutionMode value) {
 
				     case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
			
 
				     case ExecutionModeRoundingModeRTE: return "RoundingModeRTE";
			
 
				     case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
			
 
				+    case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
			
 
				+    case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
			
 
				     case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
			
 
				     case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
			
 
				     case ExecutionModeCoalescingAMDX: return "CoalescingAMDX";
			
@@ -3367,6 +3418,7 @@ inline const char* StorageClassToString(StorageClass value) {
 
				     case StorageClassImage: return "Image";
			
 
				     case StorageClassStorageBuffer: return "StorageBuffer";
			
 
				     case StorageClassTileImageEXT: return "TileImageEXT";
			
 
				+    case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
			
 
				     case StorageClassNodePayloadAMDX: return "NodePayloadAMDX";
			
 
				     case StorageClassCallableDataKHR: return "CallableDataKHR";
			
 
				     case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
			
@@ -3615,6 +3667,7 @@ inline const char* DecorationToString(Decoration value) {
 
				     case DecorationMaxByteOffset: return "MaxByteOffset";
			
 
				     case DecorationAlignmentId: return "AlignmentId";
			
 
				     case DecorationMaxByteOffsetId: return "MaxByteOffsetId";
			
 
				+    case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
			
 
				     case DecorationNoSignedWrap: return "NoSignedWrap";
			
 
				     case DecorationNoUnsignedWrap: return "NoUnsignedWrap";
			
 
				     case DecorationWeightTextureQCOM: return "WeightTextureQCOM";
			
@@ -3774,6 +3827,9 @@ inline const char* BuiltInToString(BuiltIn value) {
 
				     case BuiltInDeviceIndex: return "DeviceIndex";
			
 
				     case BuiltInViewIndex: return "ViewIndex";
			
 
				     case BuiltInShadingRateKHR: return "ShadingRateKHR";
			
 
				+    case BuiltInTileOffsetQCOM: return "TileOffsetQCOM";
			
 
				+    case BuiltInTileDimensionQCOM: return "TileDimensionQCOM";
			
 
				+    case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
			
 
				     case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
			
 
				     case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
			
 
				     case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
			
@@ -3954,7 +4010,13 @@ inline const char* CapabilityToString(Capability value) {
 
				     case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
			
 
				     case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
			
 
				     case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
			
 
				+    case CapabilityTensorsARM: return "TensorsARM";
			
 
				+    case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
			
 
				+    case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
			
 
				+    case CapabilityGraphARM: return "GraphARM";
			
 
				     case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
			
 
				+    case CapabilityFloat8EXT: return "Float8EXT";
			
 
				+    case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
			
 
				     case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
			
 
				     case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
			
 
				     case CapabilityDrawParameters: return "DrawParameters";
			
@@ -3988,6 +4050,7 @@ inline const char* CapabilityToString(Capability value) {
 
				     case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
			
 
				     case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
			
 
				     case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
			
 
				+    case CapabilityTileShadingQCOM: return "TileShadingQCOM";
			
 
				     case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
			
 
				     case CapabilityFloat16ImageAMD: return "Float16ImageAMD";
			
 
				     case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
			
@@ -3998,6 +4061,8 @@ inline const char* CapabilityToString(Capability value) {
 
				     case CapabilityShaderClockKHR: return "ShaderClockKHR";
			
 
				     case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
			
 
				     case CapabilityQuadControlKHR: return "QuadControlKHR";
			
 
				+    case CapabilityInt4TypeINTEL: return "Int4TypeINTEL";
			
 
				+    case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
			
 
				     case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
			
 
				     case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
			
 
				     case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
			
@@ -4140,6 +4205,7 @@ inline const char* CapabilityToString(Capability value) {
 
				     case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
			
 
				     case CapabilityCacheControlsINTEL: return "CacheControlsINTEL";
			
 
				     case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
			
 
				+    case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
			
 
				     default: return "Unknown";
			
 
				     }
			
 
				 }
			
@@ -4295,6 +4361,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg
 
				 inline const char* FPEncodingToString(FPEncoding value) {
			
 
				     switch (value) {
			
 
				     case FPEncodingBFloat16KHR: return "BFloat16KHR";
			
 
				+    case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
			
 
				+    case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
			
 
				     default: return "Unknown";
			
 
				     }
			
 
				 }
			
@@ -4679,6 +4747,17 @@ inline const char* OpToString(Op value) {
 
				     case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
			
 
				     case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
			
 
				     case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
			
 
				+    case OpTypeTensorARM: return "OpTypeTensorARM";
			
 
				+    case OpTensorReadARM: return "OpTensorReadARM";
			
 
				+    case OpTensorWriteARM: return "OpTensorWriteARM";
			
 
				+    case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
			
 
				+    case OpGraphConstantARM: return "OpGraphConstantARM";
			
 
				+    case OpGraphEntryPointARM: return "OpGraphEntryPointARM";
			
 
				+    case OpGraphARM: return "OpGraphARM";
			
 
				+    case OpGraphInputARM: return "OpGraphInputARM";
			
 
				+    case OpGraphSetOutputARM: return "OpGraphSetOutputARM";
			
 
				+    case OpGraphEndARM: return "OpGraphEndARM";
			
 
				+    case OpTypeGraphARM: return "OpTypeGraphARM";
			
 
				     case OpTerminateInvocation: return "OpTerminateInvocation";
			
 
				     case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
			
 
				     case OpUntypedVariableKHR: return "OpUntypedVariableKHR";
			
@@ -5126,6 +5205,9 @@ inline const char* OpToString(Op value) {
 
				     case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
			
 
				     case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
			
 
				     case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
			
 
				+    case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
			
 
				+    case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
			
 
				+    case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
			
 
				     default: return "Unknown";
			
 
				     }
			
 
				 }
			
@@ -5186,6 +5268,10 @@ inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, Te
 
				 inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
			
 
				 inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
			
 
				 inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
			
 
				+inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
			
 
				+inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
			
 
				+inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
			
 
				+inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
			
 
				 inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
			
 
				 inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
			
 
				 inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }
			
--- a/3rdparty/spirv-cross/spirv_cfg.cpp
+++ b/3rdparty/spirv-cross/spirv_cfg.cpp
@@ -81,31 +81,105 @@ bool CFG::is_back_edge(uint32_t to) const
 
				 	// We have a back edge if the visit order is set with the temporary magic value 0.
			
 
				 	// Crossing edges will have already been recorded with a visit order.
			
 
				 	auto itr = visit_order.find(to);
			
 
				-	return itr != end(visit_order) && itr->second.get() == 0;
			
 
				+	return itr != end(visit_order) && itr->second.visited_branches && !itr->second.visited_resolve;
			
 
				 }
			
 
				 
			
 
				-bool CFG::has_visited_forward_edge(uint32_t to) const
			
 
				+bool CFG::has_visited_branch(uint32_t to) const
			
 
				 {
			
 
				-	// If > 0, we have visited the edge already, and this is not a back edge branch.
			
 
				 	auto itr = visit_order.find(to);
			
 
				-	return itr != end(visit_order) && itr->second.get() > 0;
			
 
				+	return itr != end(visit_order) && itr->second.visited_branches;
			
 
				 }
			
 
				 
			
 
				-bool CFG::post_order_visit(uint32_t block_id)
			
 
				+void CFG::post_order_visit_entry(uint32_t block)
			
 
				 {
			
 
				-	// If we have already branched to this block (back edge), stop recursion.
			
 
				-	// If our branches are back-edges, we do not record them.
			
 
				-	// We have to record crossing edges however.
			
 
				-	if (has_visited_forward_edge(block_id))
			
 
				-		return true;
			
 
				-	else if (is_back_edge(block_id))
			
 
				-		return false;
			
 
				+	visit_stack.push_back(block);
			
 
				 
			
 
				-	// Block back-edges from recursively revisiting ourselves.
			
 
				-	visit_order[block_id].get() = 0;
			
 
				+	while (!visit_stack.empty())
			
 
				+	{
			
 
				+		bool keep_iterating;
			
 
				+		do
			
 
				+		{
			
 
				+			// Reverse the order to allow for stack-like behavior and preserves the visit order from recursive algorithm.
			
 
				+			// Traverse depth first.
			
 
				+			uint32_t to_visit = visit_stack.back();
			
 
				+			last_visited_size = visit_stack.size();
			
 
				+			post_order_visit_branches(to_visit);
			
 
				+			keep_iterating = last_visited_size != visit_stack.size();
			
 
				+			if (keep_iterating)
			
 
				+				std::reverse(visit_stack.begin() + last_visited_size, visit_stack.end());
			
 
				+		} while (keep_iterating);
			
 
				+
			
 
				+		// We've reached the end of some tree leaf. Resolve the stack.
			
 
				+		// Any node which has been visited for real can be popped now.
			
 
				+		while (!visit_stack.empty() && visit_order[visit_stack.back()].visited_branches)
			
 
				+		{
			
 
				+			post_order_visit_resolve(visit_stack.back());
			
 
				+			visit_stack.pop_back();
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void CFG::visit_branch(uint32_t block_id)
			
 
				+{
			
 
				+	// Prune obvious duplicates.
			
 
				+	if (std::find(visit_stack.begin() + last_visited_size, visit_stack.end(), block_id) == visit_stack.end() &&
			
 
				+	    !has_visited_branch(block_id))
			
 
				+	{
			
 
				+		visit_stack.push_back(block_id);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void CFG::post_order_visit_branches(uint32_t block_id)
			
 
				+{
			
 
				+	auto &block = compiler.get<SPIRBlock>(block_id);
			
 
				+
			
 
				+	auto &visit = visit_order[block_id];
			
 
				+	if (visit.visited_branches)
			
 
				+		return;
			
 
				+	visit.visited_branches = true;
			
 
				+
			
 
				+	if (block.merge == SPIRBlock::MergeLoop)
			
 
				+		visit_branch(block.merge_block);
			
 
				+	else if (block.merge == SPIRBlock::MergeSelection)
			
 
				+		visit_branch(block.next_block);
			
 
				+
			
 
				+	// First visit our branch targets.
			
 
				+	switch (block.terminator)
			
 
				+	{
			
 
				+	case SPIRBlock::Direct:
			
 
				+		visit_branch(block.next_block);
			
 
				+		break;
			
 
				+
			
 
				+	case SPIRBlock::Select:
			
 
				+		visit_branch(block.true_block);
			
 
				+		visit_branch(block.false_block);
			
 
				+		break;
			
 
				 
			
 
				+	case SPIRBlock::MultiSelect:
			
 
				+	{
			
 
				+		const auto &cases = compiler.get_case_list(block);
			
 
				+		for (const auto &target : cases)
			
 
				+			visit_branch(target.block);
			
 
				+		if (block.default_block)
			
 
				+			visit_branch(block.default_block);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void CFG::post_order_visit_resolve(uint32_t block_id)
			
 
				+{
			
 
				 	auto &block = compiler.get<SPIRBlock>(block_id);
			
 
				 
			
 
				+	auto &visit_block = visit_order[block_id];
			
 
				+	assert(visit_block.visited_branches);
			
 
				+	auto &visited = visit_order[block_id].visited_resolve;
			
 
				+	if (visited)
			
 
				+		return;
			
 
				+
			
 
				 	// If this is a loop header, add an implied branch to the merge target.
			
 
				 	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
			
 
				 	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
			
@@ -116,21 +190,21 @@ bool CFG::post_order_visit(uint32_t block_id)
 
				 	// is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
			
 
				 	// For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
			
 
				 	// but for loops, only the header might end up actually branching to merge block.
			
 
				-	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
			
 
				+	if (block.merge == SPIRBlock::MergeLoop && !is_back_edge(block.merge_block))
			
 
				 		add_branch(block_id, block.merge_block);
			
 
				 
			
 
				 	// First visit our branch targets.
			
 
				 	switch (block.terminator)
			
 
				 	{
			
 
				 	case SPIRBlock::Direct:
			
 
				-		if (post_order_visit(block.next_block))
			
 
				+		if (!is_back_edge(block.next_block))
			
 
				 			add_branch(block_id, block.next_block);
			
 
				 		break;
			
 
				 
			
 
				 	case SPIRBlock::Select:
			
 
				-		if (post_order_visit(block.true_block))
			
 
				+		if (!is_back_edge(block.true_block))
			
 
				 			add_branch(block_id, block.true_block);
			
 
				-		if (post_order_visit(block.false_block))
			
 
				+		if (!is_back_edge(block.false_block))
			
 
				 			add_branch(block_id, block.false_block);
			
 
				 		break;
			
 
				 
			
@@ -139,10 +213,10 @@ bool CFG::post_order_visit(uint32_t block_id)
 
				 		const auto &cases = compiler.get_case_list(block);
			
 
				 		for (const auto &target : cases)
			
 
				 		{
			
 
				-			if (post_order_visit(target.block))
			
 
				+			if (!is_back_edge(target.block))
			
 
				 				add_branch(block_id, target.block);
			
 
				 		}
			
 
				-		if (block.default_block && post_order_visit(block.default_block))
			
 
				+		if (block.default_block && !is_back_edge(block.default_block))
			
 
				 			add_branch(block_id, block.default_block);
			
 
				 		break;
			
 
				 	}
			
@@ -157,7 +231,7 @@ bool CFG::post_order_visit(uint32_t block_id)
 
				 	// We can use the variable without a Phi since there is only one possible parent here.
			
 
				 	// However, in this case, we need to hoist out the inner variable to outside the branch.
			
 
				 	// Use same strategy as loops.
			
 
				-	if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
			
 
				+	if (block.merge == SPIRBlock::MergeSelection && !is_back_edge(block.next_block))
			
 
				 	{
			
 
				 		// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
			
 
				 		// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
			
@@ -201,10 +275,9 @@ bool CFG::post_order_visit(uint32_t block_id)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
			
 
				-	visit_order[block_id].get() = ++visit_count;
			
 
				+	visited = true;
			
 
				+	visit_block.order = ++visit_count;
			
 
				 	post_order.push_back(block_id);
			
 
				-	return true;
			
 
				 }
			
 
				 
			
 
				 void CFG::build_post_order_visit_order()
			
@@ -213,11 +286,12 @@ void CFG::build_post_order_visit_order()
 
				 	visit_count = 0;
			
 
				 	visit_order.clear();
			
 
				 	post_order.clear();
			
 
				-	post_order_visit(block);
			
 
				+	post_order_visit_entry(block);
			
 
				 }
			
 
				 
			
 
				 void CFG::add_branch(uint32_t from, uint32_t to)
			
 
				 {
			
 
				+	assert(from && to);
			
 
				 	const auto add_unique = [](SmallVector<uint32_t> &l, uint32_t value) {
			
 
				 		auto itr = find(begin(l), end(l), value);
			
 
				 		if (itr == end(l))
			
--- a/3rdparty/spirv-cross/spirv_cfg.hpp
+++ b/3rdparty/spirv-cross/spirv_cfg.hpp
@@ -68,7 +68,7 @@ public:
 
				 	{
			
 
				 		auto itr = visit_order.find(block);
			
 
				 		assert(itr != std::end(visit_order));
			
 
				-		int v = itr->second.get();
			
 
				+		int v = itr->second.order;
			
 
				 		assert(v > 0);
			
 
				 		return uint32_t(v);
			
 
				 	}
			
@@ -114,17 +114,9 @@ public:
 
				 private:
			
 
				 	struct VisitOrder
			
 
				 	{
			
 
				-		int &get()
			
 
				-		{
			
 
				-			return v;
			
 
				-		}
			
 
				-
			
 
				-		const int &get() const
			
 
				-		{
			
 
				-			return v;
			
 
				-		}
			
 
				-
			
 
				-		int v = -1;
			
 
				+		int order = -1;
			
 
				+		bool visited_resolve = false;
			
 
				+		bool visited_branches = false;
			
 
				 	};
			
 
				 
			
 
				 	Compiler &compiler;
			
@@ -139,11 +131,17 @@ private:
 
				 	void add_branch(uint32_t from, uint32_t to);
			
 
				 	void build_post_order_visit_order();
			
 
				 	void build_immediate_dominators();
			
 
				-	bool post_order_visit(uint32_t block);
			
 
				+	void post_order_visit_branches(uint32_t block);
			
 
				+	void post_order_visit_resolve(uint32_t block);
			
 
				+	void post_order_visit_entry(uint32_t block);
			
 
				 	uint32_t visit_count = 0;
			
 
				 
			
 
				 	bool is_back_edge(uint32_t to) const;
			
 
				-	bool has_visited_forward_edge(uint32_t to) const;
			
 
				+	bool has_visited_branch(uint32_t to) const;
			
 
				+	void visit_branch(uint32_t block_id);
			
 
				+
			
 
				+	SmallVector<uint32_t> visit_stack;
			
 
				+	size_t last_visited_size = 0;
			
 
				 };
			
 
				 
			
 
				 class DominatorBuilder
			
--- a/3rdparty/spirv-cross/spirv_common.hpp
+++ b/3rdparty/spirv-cross/spirv_common.hpp
@@ -574,6 +574,7 @@ struct SPIRType : IVariant
 
				 		Sampler,
			
 
				 		AccelerationStructure,
			
 
				 		RayQuery,
			
 
				+		CoopVecNV,
			
 
				 
			
 
				 		// Keep internal types at the end.
			
 
				 		ControlPointArray,
			
@@ -581,7 +582,11 @@ struct SPIRType : IVariant
 
				 		Char,
			
 
				 		// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
			
 
				 		MeshGridProperties,
			
 
				-		BFloat16
			
 
				+		BFloat16,
			
 
				+		FloatE4M3,
			
 
				+		FloatE5M2,
			
 
				+
			
 
				+		Tensor
			
 
				 	};
			
 
				 
			
 
				 	// Scalar/vector/matrix support.
			
@@ -606,13 +611,29 @@ struct SPIRType : IVariant
 
				 	bool pointer = false;
			
 
				 	bool forward_pointer = false;
			
 
				 
			
 
				-	struct
			
 
				+	union
			
 
				 	{
			
 
				-		uint32_t use_id = 0;
			
 
				-		uint32_t rows_id = 0;
			
 
				-		uint32_t columns_id = 0;
			
 
				-		uint32_t scope_id = 0;
			
 
				-	} cooperative;
			
 
				+		struct
			
 
				+		{
			
 
				+			uint32_t use_id;
			
 
				+			uint32_t rows_id;
			
 
				+			uint32_t columns_id;
			
 
				+			uint32_t scope_id;
			
 
				+		} cooperative;
			
 
				+
			
 
				+		struct
			
 
				+		{
			
 
				+			uint32_t component_type_id;
			
 
				+			uint32_t component_count_id;
			
 
				+		} coopVecNV;
			
 
				+
			
 
				+		struct
			
 
				+		{
			
 
				+			uint32_t type;
			
 
				+			uint32_t rank;
			
 
				+			uint32_t shape;
			
 
				+		} tensor;
			
 
				+	} ext;
			
 
				 
			
 
				 	spv::StorageClass storage = spv::StorageClassGeneric;
			
 
				 
			
@@ -670,6 +691,12 @@ struct SPIRExtension : IVariant
 
				 		NonSemanticGeneric
			
 
				 	};
			
 
				 
			
 
				+	enum ShaderDebugInfoOps
			
 
				+	{
			
 
				+		DebugLine = 103,
			
 
				+		DebugSource = 35
			
 
				+	};
			
 
				+
			
 
				 	explicit SPIRExtension(Extension ext_)
			
 
				 	    : ext(ext_)
			
 
				 	{
			
@@ -695,6 +722,11 @@ struct SPIREntryPoint
 
				 	FunctionID self = 0;
			
 
				 	std::string name;
			
 
				 	std::string orig_name;
			
 
				+	std::unordered_map<uint32_t, uint32_t> fp_fast_math_defaults;
			
 
				+	bool signed_zero_inf_nan_preserve_8 = false;
			
 
				+	bool signed_zero_inf_nan_preserve_16 = false;
			
 
				+	bool signed_zero_inf_nan_preserve_32 = false;
			
 
				+	bool signed_zero_inf_nan_preserve_64 = false;
			
 
				 	SmallVector<VariableID> interface_variables;
			
 
				 
			
 
				 	Bitset flags;
			
@@ -927,6 +959,7 @@ struct SPIRBlock : IVariant
 
				 	// All access to these variables are dominated by this block,
			
 
				 	// so before branching anywhere we need to make sure that we declare these variables.
			
 
				 	SmallVector<VariableID> dominated_variables;
			
 
				+	SmallVector<bool> rearm_dominated_variables;
			
 
				 
			
 
				 	// These are variables which should be declared in a for loop header, if we
			
 
				 	// fail to use a classic for-loop,
			
@@ -1238,6 +1271,26 @@ struct SPIRConstant : IVariant
 
				 		return u.f32;
			
 
				 	}
			
 
				 
			
 
				+	static inline float fe4m3_to_f32(uint8_t v)
			
 
				+	{
			
 
				+		if ((v & 0x7f) == 0x7f)
			
 
				+		{
			
 
				+			union
			
 
				+			{
			
 
				+				float f32;
			
 
				+				uint32_t u32;
			
 
				+			} u;
			
 
				+
			
 
				+			u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
			
 
				+			return u.f32;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// Reuse the FP16 to FP32 code. Cute bit-hackery.
			
 
				+			return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
			
 
				 	{
			
 
				 		return m.c[col].id[row];
			
@@ -1286,6 +1339,16 @@ struct SPIRConstant : IVariant
 
				 		return fp32;
			
 
				 	}
			
 
				 
			
 
				+	inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
			
 
				+	{
			
 
				+		return fe4m3_to_f32(scalar_u8(col, row));
			
 
				+	}
			
 
				+
			
 
				+	inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
			
 
				+	{
			
 
				+		return f16_to_f32(scalar_u8(col, row) << 8);
			
 
				+	}
			
 
				+
			
 
				 	inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
			
 
				 	{
			
 
				 		return m.c[col].r[row].f32;
			
@@ -1356,9 +1419,10 @@ struct SPIRConstant : IVariant
 
				 
			
 
				 	SPIRConstant() = default;
			
 
				 
			
 
				-	SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
			
 
				+	SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
			
 
				 	    : constant_type(constant_type_)
			
 
				 	    , specialization(specialized)
			
 
				+	    , replicated(replicated_)
			
 
				 	{
			
 
				 		subconstants.reserve(num_elements);
			
 
				 		for (uint32_t i = 0; i < num_elements; i++)
			
@@ -1437,6 +1501,9 @@ struct SPIRConstant : IVariant
 
				 	// For composites which are constant arrays, etc.
			
 
				 	SmallVector<ConstantID> subconstants;
			
 
				 
			
 
				+	// Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
			
 
				+	bool replicated = false;
			
 
				+
			
 
				 	// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
			
 
				 	// and uses them to initialize the constant. This allows the user
			
 
				 	// to still be able to specialize the value by supplying corresponding
			
@@ -1732,6 +1799,7 @@ struct Meta
 
				 		uint32_t spec_id = 0;
			
 
				 		uint32_t index = 0;
			
 
				 		spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
			
 
				+		spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
			
 
				 		bool builtin = false;
			
 
				 		bool qualified_alias_explicit_override = false;
			
 
				 
			
@@ -1787,7 +1855,8 @@ private:
 
				 
			
 
				 static inline bool type_is_floating_point(const SPIRType &type)
			
 
				 {
			
 
				-	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
			
 
				+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double ||
			
 
				+	       type.basetype == SPIRType::BFloat16 || type.basetype == SPIRType::FloatE5M2 || type.basetype == SPIRType::FloatE4M3;
			
 
				 }
			
 
				 
			
 
				 static inline bool type_is_integral(const SPIRType &type)
			
--- a/3rdparty/spirv-cross/spirv_cross.cpp
+++ b/3rdparty/spirv-cross/spirv_cross.cpp
@@ -280,6 +280,9 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 
				 			// This is a global side effect of the function.
			
 
				 			return false;
			
 
				 
			
 
				+		case OpTensorReadARM:
			
 
				+			return false;
			
 
				+
			
 
				 		case OpExtInst:
			
 
				 		{
			
 
				 			uint32_t extension_set = ops[2];
			
@@ -373,6 +376,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
 
				 
			
 
				 		case OpLoad:
			
 
				 		case OpCooperativeMatrixLoadKHR:
			
 
				+		case OpCooperativeVectorLoadNV:
			
 
				 		case OpImageRead:
			
 
				 		{
			
 
				 			// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
			
@@ -1152,6 +1156,11 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *
 
				 			{
			
 
				 				res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
			
 
				 			}
			
 
				+			// Tensors
			
 
				+			else if (type.basetype == SPIRType::Tensor)
			
 
				+			{
			
 
				+				res.tensors.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
			
 
				+			}
			
 
				 			else
			
 
				 			{
			
 
				 				res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
			
@@ -1169,11 +1178,8 @@ bool Compiler::type_is_top_level_block(const SPIRType &type) const
 
				 	return has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
			
 
				 }
			
 
				 
			
 
				-bool Compiler::type_is_block_like(const SPIRType &type) const
			
 
				+bool Compiler::type_is_explicit_layout(const SPIRType &type) const
			
 
				 {
			
 
				-	if (type_is_top_level_block(type))
			
 
				-		return true;
			
 
				-
			
 
				 	if (type.basetype == SPIRType::Struct)
			
 
				 	{
			
 
				 		// Block-like types may have Offset decorations.
			
@@ -1185,6 +1191,14 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				+bool Compiler::type_is_block_like(const SPIRType &type) const
			
 
				+{
			
 
				+	if (type_is_top_level_block(type))
			
 
				+		return true;
			
 
				+	else
			
 
				+		return type_is_explicit_layout(type);
			
 
				+}
			
 
				+
			
 
				 void Compiler::parse_fixup()
			
 
				 {
			
 
				 	// Figure out specialization constants for work group sizes.
			
@@ -2370,6 +2384,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 
				 		execution.output_primitives = arg0;
			
 
				 		break;
			
 
				 
			
 
				+	case ExecutionModeFPFastMathDefault:
			
 
				+		execution.fp_fast_math_defaults[arg0] = arg1;
			
 
				+		break;
			
 
				+
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
@@ -4334,6 +4352,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
 
				 
			
 
				 		case OpCopyObject:
			
 
				 		case OpLoad:
			
 
				+		case OpCooperativeVectorLoadNV:
			
 
				 		case OpCooperativeMatrixLoadKHR:
			
 
				 			if (ops[2] == var)
			
 
				 				return true;
			
@@ -5151,7 +5170,7 @@ bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
 
				 bool Compiler::type_is_opaque_value(const SPIRType &type) const
			
 
				 {
			
 
				 	return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
			
 
				-	                         type.basetype == SPIRType::Sampler);
			
 
				+	                         type.basetype == SPIRType::Sampler || type.basetype == SPIRType::Tensor);
			
 
				 }
			
 
				 
			
 
				 // Make these member functions so we can easily break on any force_recompile events.
			
@@ -5469,6 +5488,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
 
				 	{
			
 
				 	case OpLoad:
			
 
				 	case OpCooperativeMatrixLoadKHR:
			
 
				+	case OpCooperativeVectorLoadNV:
			
 
				 	{
			
 
				 		if (length < 3)
			
 
				 			return false;
			
@@ -5547,6 +5567,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
 
				 	case OpImageWrite:
			
 
				 	case OpAtomicStore:
			
 
				 	case OpCooperativeMatrixStoreKHR:
			
 
				+	case OpCooperativeVectorStoreNV:
			
 
				 	{
			
 
				 		if (length < 1)
			
 
				 			return false;
			
--- a/3rdparty/spirv-cross/spirv_cross.hpp
+++ b/3rdparty/spirv-cross/spirv_cross.hpp
@@ -95,6 +95,7 @@ struct ShaderResources
 
				 	SmallVector<Resource> atomic_counters;
			
 
				 	SmallVector<Resource> acceleration_structures;
			
 
				 	SmallVector<Resource> gl_plain_uniforms;
			
 
				+	SmallVector<Resource> tensors;
			
 
				 
			
 
				 	// There can only be one push constant block,
			
 
				 	// but keep the vector in case this restriction is lifted in the future.
			
@@ -1171,6 +1172,7 @@ protected:
 
				 	bool type_contains_recursion(const SPIRType &type);
			
 
				 	bool type_is_array_of_pointers(const SPIRType &type) const;
			
 
				 	bool type_is_block_like(const SPIRType &type) const;
			
 
				+	bool type_is_explicit_layout(const SPIRType &type) const;
			
 
				 	bool type_is_top_level_block(const SPIRType &type) const;
			
 
				 	bool type_is_opaque_value(const SPIRType &type) const;
			
 
				 
			
--- a/3rdparty/spirv-cross/spirv_cross_c.cpp
+++ b/3rdparty/spirv-cross/spirv_cross_c.cpp
@@ -200,6 +200,7 @@ struct spvc_resources_s : ScratchMemoryAllocation
 
				 	SmallVector<spvc_reflected_resource> separate_samplers;
			
 
				 	SmallVector<spvc_reflected_resource> acceleration_structures;
			
 
				 	SmallVector<spvc_reflected_resource> gl_plain_uniforms;
			
 
				+	SmallVector<spvc_reflected_resource> tensors;
			
 
				 
			
 
				 	SmallVector<spvc_reflected_builtin_resource> builtin_inputs;
			
 
				 	SmallVector<spvc_reflected_builtin_resource> builtin_outputs;
			
@@ -1872,6 +1873,8 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources)
 
				 		return false;
			
 
				 	if (!copy_resources(gl_plain_uniforms, resources.gl_plain_uniforms))
			
 
				 		return false;
			
 
				+	if (!copy_resources(tensors, resources.tensors))
			
 
				+		return false;
			
 
				 	if (!copy_resources(builtin_inputs, resources.builtin_inputs))
			
 
				 		return false;
			
 
				 	if (!copy_resources(builtin_outputs, resources.builtin_outputs))
			
@@ -2025,6 +2028,11 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources,
 
				 
			
 
				 	case SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM:
			
 
				 		list = &resources->gl_plain_uniforms;
			
 
				+		break;
			
 
				+
			
 
				+	case SPVC_RESOURCE_TYPE_TENSOR:
			
 
				+		list = &resources->tensors;
			
 
				+		break;
			
 
				 
			
 
				 	default:
			
 
				 		break;
			
--- a/3rdparty/spirv-cross/spirv_cross_c.h
+++ b/3rdparty/spirv-cross/spirv_cross_c.h
@@ -40,7 +40,7 @@ extern "C" {
 
				 /* Bumped if ABI or API breaks backwards compatibility. */
			
 
				 #define SPVC_C_API_VERSION_MAJOR 0
			
 
				 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
			
 
				-#define SPVC_C_API_VERSION_MINOR 66
			
 
				+#define SPVC_C_API_VERSION_MINOR 67
			
 
				 /* Bumped if internal implementation details change. */
			
 
				 #define SPVC_C_API_VERSION_PATCH 0
			
 
				 
			
@@ -227,6 +227,7 @@ typedef enum spvc_resource_type
 
				 	SPVC_RESOURCE_TYPE_RAY_QUERY = 13,
			
 
				 	SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14,
			
 
				 	SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15,
			
 
				+	SPVC_RESOURCE_TYPE_TENSOR = 16,
			
 
				 	SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff
			
 
				 } spvc_resource_type;
			
 
				 
			
--- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp
+++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp
@@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
 
				 		dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument);
			
 
				 		break;
			
 
				 
			
 
				+	case DecorationFPFastMathMode:
			
 
				+		dec.fp_fast_math_mode = static_cast<FPFastMathModeMask>(argument);
			
 
				+		break;
			
 
				+
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
@@ -523,8 +527,27 @@ void ParsedIR::mark_used_as_array_length(ID id)
 
				 	switch (ids[id].get_type())
			
 
				 	{
			
 
				 	case TypeConstant:
			
 
				-		get<SPIRConstant>(id).is_used_as_array_length = true;
			
 
				+	{
			
 
				+		auto &c = get<SPIRConstant>(id);
			
 
				+		c.is_used_as_array_length = true;
			
 
				+
			
 
				+		// Mark composite dependencies as well.
			
 
				+		for (auto &sub_id: c.m.id)
			
 
				+			if (sub_id)
			
 
				+				mark_used_as_array_length(sub_id);
			
 
				+
			
 
				+		for (uint32_t col = 0; col < c.m.columns; col++)
			
 
				+		{
			
 
				+			for (auto &sub_id : c.m.c[col].id)
			
 
				+				if (sub_id)
			
 
				+					mark_used_as_array_length(sub_id);
			
 
				+		}
			
 
				+
			
 
				+		for (auto &sub_id : c.subconstants)
			
 
				+			if (sub_id)
			
 
				+				mark_used_as_array_length(sub_id);
			
 
				 		break;
			
 
				+	}
			
 
				 
			
 
				 	case TypeConstantOp:
			
 
				 	{
			
@@ -643,6 +666,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
 
				 		return dec.index;
			
 
				 	case DecorationFPRoundingMode:
			
 
				 		return dec.fp_rounding_mode;
			
 
				+	case DecorationFPFastMathMode:
			
 
				+		return dec.fp_fast_math_mode;
			
 
				 	default:
			
 
				 		return 1;
			
 
				 	}
			
@@ -730,6 +755,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
 
				 		dec.fp_rounding_mode = FPRoundingModeMax;
			
 
				 		break;
			
 
				 
			
 
				+	case DecorationFPFastMathMode:
			
 
				+		dec.fp_fast_math_mode = FPFastMathModeMaskNone;
			
 
				+		break;
			
 
				+
			
 
				 	case DecorationHlslCounterBufferGOOGLE:
			
 
				 	{
			
 
				 		auto &counter = meta[id].hlsl_magic_counter_buffer;
			
--- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp
+++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp
@@ -111,6 +111,7 @@ public:
 
				 
			
 
				 	struct Source
			
 
				 	{
			
 
				+		spv::SourceLanguage lang = spv::SourceLanguageUnknown;
			
 
				 		uint32_t version = 0;
			
 
				 		bool es = false;
			
 
				 		bool known = false;
			
--- a/3rdparty/spirv-cross/spirv_glsl.cpp
+++ b/3rdparty/spirv-cross/spirv_glsl.cpp
--- a/3rdparty/spirv-cross/spirv_glsl.hpp
+++ b/3rdparty/spirv-cross/spirv_glsl.hpp
@@ -32,6 +32,8 @@
 
				 
			
 
				 namespace SPIRV_CROSS_NAMESPACE
			
 
				 {
			
 
				+struct GlslConstantNameMapping;
			
 
				+
			
 
				 enum PlsFormat
			
 
				 {
			
 
				 	PlsNone = 0,
			
@@ -426,6 +428,8 @@ protected:
 
				 	                                                   const uint32_t *args, uint32_t count);
			
 
				 	virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
			
 
				 	                                        uint32_t count);
			
 
				+	void emit_non_semantic_shader_debug_info(uint32_t result_type, uint32_t result_id, uint32_t op,
			
 
				+	                                         const uint32_t *args, uint32_t count);
			
 
				 	virtual void emit_header();
			
 
				 	void emit_line_directive(uint32_t file_id, uint32_t line_literal);
			
 
				 	void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
			
@@ -662,6 +666,7 @@ protected:
 
				 		bool workgroup_size_is_hidden = false;
			
 
				 		bool requires_relaxed_precision_analysis = false;
			
 
				 		bool implicit_c_integer_promotion_rules = false;
			
 
				+		bool supports_spec_constant_array_size = true;
			
 
				 	} backend;
			
 
				 
			
 
				 	void emit_struct(SPIRType &type);
			
@@ -685,6 +690,8 @@ protected:
 
				 	void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
			
 
				 	                                    const SmallVector<uint32_t> &indices);
			
 
				 	void emit_block_chain(SPIRBlock &block);
			
 
				+	BlockID emit_block_chain_inner(SPIRBlock &block);
			
 
				+	void emit_block_chain_cleanup(SPIRBlock &block);
			
 
				 	void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
			
 
				 	int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
			
 
				 	void emit_constant(const SPIRConstant &constant);
			
@@ -802,6 +809,10 @@ protected:
 
				 	virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
			
 
				 	std::string to_non_uniform_aware_expression(uint32_t id);
			
 
				 	std::string to_atomic_ptr_expression(uint32_t id);
			
 
				+	std::string to_pretty_expression_if_int_constant(
			
 
				+			uint32_t id,
			
 
				+			const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end,
			
 
				+			bool register_expression_read = true);
			
 
				 	std::string to_expression(uint32_t id, bool register_expression_read = true);
			
 
				 	std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
			
 
				 	std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
			
@@ -830,7 +841,7 @@ protected:
 
				 	void emit_output_variable_initializer(const SPIRVariable &var);
			
 
				 	std::string to_precision_qualifiers_glsl(uint32_t id);
			
 
				 	virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
			
 
				-	std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
			
 
				+	std::string flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags);
			
 
				 	const char *format_to_glsl(spv::ImageFormat format);
			
 
				 	virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
			
 
				 	virtual std::string to_interpolation_qualifiers(const Bitset &flags);
			
@@ -1017,6 +1028,8 @@ protected:
 
				 	const Instruction *get_next_instruction_in_block(const Instruction &instr);
			
 
				 	static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
			
 
				 
			
 
				+	std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
			
 
				+	std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
			
 
				 	std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
			
 
				 	std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
			
 
				 	std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
			
@@ -1068,6 +1081,9 @@ protected:
 
				 	std::string format_float(float value) const;
			
 
				 	std::string format_double(double value) const;
			
 
				 
			
 
				+	uint32_t get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const;
			
 
				+	bool has_legacy_nocontract(uint32_t result_type, uint32_t id) const;
			
 
				+
			
 
				 private:
			
 
				 	void init();
			
 
				 
			
--- a/3rdparty/spirv-cross/spirv_hlsl.cpp
+++ b/3rdparty/spirv-cross/spirv_hlsl.cpp
@@ -3029,7 +3029,7 @@ string CompilerHLSL::get_inner_entry_point_name() const
 
				 		SPIRV_CROSS_THROW("Unsupported execution model.");
			
 
				 }
			
 
				 
			
 
				-uint32_t CompilerHLSL::input_vertices_from_execution_mode(spirv_cross::SPIREntryPoint &execution) const
			
 
				+uint32_t CompilerHLSL::input_vertices_from_execution_mode(SPIREntryPoint &execution) const
			
 
				 {
			
 
				 	uint32_t input_vertices = 1;
			
 
				 
			
@@ -3061,7 +3061,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
 
				 	auto &type = get<SPIRType>(func.return_type);
			
 
				 	if (type.array.empty())
			
 
				 	{
			
 
				-		decl += flags_to_qualifiers_glsl(type, return_flags);
			
 
				+		decl += flags_to_qualifiers_glsl(type, 0, return_flags);
			
 
				 		decl += type_to_glsl(type);
			
 
				 		decl += " ";
			
 
				 	}
			
--- a/3rdparty/spirv-cross/spirv_msl.cpp
+++ b/3rdparty/spirv-cross/spirv_msl.cpp
@@ -510,7 +510,7 @@ void CompilerMSL::build_implicit_builtins()
 
				 				has_local_invocation_index = true;
			
 
				 			}
			
 
				 
			
 
				-			if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
			
 
				+			if (need_workgroup_size && builtin == BuiltInWorkgroupSize)
			
 
				 			{
			
 
				 				builtin_workgroup_size_id = var.self;
			
 
				 				mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
			
@@ -684,28 +684,6 @@ void CompilerMSL::build_implicit_builtins()
 
				 				mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
			
 
				 			}
			
 
				 
			
 
				-			if (need_multiview)
			
 
				-			{
			
 
				-				// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
			
 
				-				// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
			
 
				-				// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
			
 
				-				// gl_Layer is an output in vertex-pipeline shaders.
			
 
				-				uint32_t type_ptr_out_id = ir.increase_bound_by(2);
			
 
				-				SPIRType uint_type_ptr_out = get_uint_type();
			
 
				-				uint_type_ptr.op = OpTypePointer;
			
 
				-				uint_type_ptr_out.pointer = true;
			
 
				-				uint_type_ptr_out.pointer_depth++;
			
 
				-				uint_type_ptr_out.parent_type = get_uint_type_id();
			
 
				-				uint_type_ptr_out.storage = StorageClassOutput;
			
 
				-				auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
			
 
				-				ptr_out_type.self = get_uint_type_id();
			
 
				-				uint32_t var_id = type_ptr_out_id + 1;
			
 
				-				set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
			
 
				-				set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
			
 
				-				builtin_layer_id = var_id;
			
 
				-				mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
			
 
				-			}
			
 
				-
			
 
				 			if (need_multiview && !has_view_idx)
			
 
				 			{
			
 
				 				uint32_t var_id = ir.increase_bound_by(1);
			
@@ -718,6 +696,28 @@ void CompilerMSL::build_implicit_builtins()
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		if (need_multiview)
			
 
				+		{
			
 
				+			// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
			
 
				+			// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
			
 
				+			// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
			
 
				+			// gl_Layer is an output in vertex-pipeline shaders.
			
 
				+			uint32_t type_ptr_out_id = ir.increase_bound_by(2);
			
 
				+			SPIRType uint_type_ptr_out = get_uint_type();
			
 
				+			uint_type_ptr_out.op = OpTypePointer;
			
 
				+			uint_type_ptr_out.pointer = true;
			
 
				+			uint_type_ptr_out.pointer_depth++;
			
 
				+			uint_type_ptr_out.parent_type = get_uint_type_id();
			
 
				+			uint_type_ptr_out.storage = StorageClassOutput;
			
 
				+			auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
			
 
				+			ptr_out_type.self = get_uint_type_id();
			
 
				+			uint32_t var_id = type_ptr_out_id + 1;
			
 
				+			set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
			
 
				+			set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
			
 
				+			builtin_layer_id = var_id;
			
 
				+			mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
			
 
				+		}
			
 
				+
			
 
				 		if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
			
 
				 		    (need_tese_params && !has_primitive_id) || need_grid_params)
			
 
				 		{
			
@@ -932,25 +932,55 @@ void CompilerMSL::build_implicit_builtins()
 
				 
			
 
				 		if (need_workgroup_size && !has_workgroup_size)
			
 
				 		{
			
 
				-			uint32_t offset = ir.increase_bound_by(2);
			
 
				-			uint32_t type_ptr_id = offset;
			
 
				-			uint32_t var_id = offset + 1;
			
 
				-
			
 
				-			// Create gl_WorkgroupSize.
			
 
				-			uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
			
 
				-			SPIRType uint_type_ptr = get<SPIRType>(type_id);
			
 
				-			uint_type_ptr.op = OpTypePointer;
			
 
				-			uint_type_ptr.pointer = true;
			
 
				-			uint_type_ptr.pointer_depth++;
			
 
				-			uint_type_ptr.parent_type = type_id;
			
 
				-			uint_type_ptr.storage = StorageClassInput;
			
 
				+			auto &execution = get_entry_point();
			
 
				+			// First, check if the workgroup size _constant_ were defined.
			
 
				+			// If it were, we don't need to do--in fact, shouldn't do--anything.
			
 
				+			builtin_workgroup_size_id = execution.workgroup_size.constant;
			
 
				+			if (builtin_workgroup_size_id == 0)
			
 
				+			{
			
 
				+				uint32_t var_id = ir.increase_bound_by(1);
			
 
				 
			
 
				-			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
			
 
				-			ptr_type.self = type_id;
			
 
				-			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
			
 
				-			set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
			
 
				-			builtin_workgroup_size_id = var_id;
			
 
				-			mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
			
 
				+				// Create gl_WorkgroupSize.
			
 
				+				uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
			
 
				+				// If we have LocalSize or LocalSizeId, use those to define the workgroup size.
			
 
				+				if (execution.flags.get(ExecutionModeLocalSizeId))
			
 
				+				{
			
 
				+					const SPIRConstant *init[] = { &get<SPIRConstant>(execution.workgroup_size.id_x),
			
 
				+						                           &get<SPIRConstant>(execution.workgroup_size.id_y),
			
 
				+						                           &get<SPIRConstant>(execution.workgroup_size.id_z) };
			
 
				+					bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization;
			
 
				+					set<SPIRConstant>(var_id, type_id, init, 3, specialized);
			
 
				+					execution.workgroup_size.constant = var_id;
			
 
				+				}
			
 
				+				else if (execution.flags.get(ExecutionModeLocalSize))
			
 
				+				{
			
 
				+					uint32_t offset = ir.increase_bound_by(3);
			
 
				+					const SPIRConstant *init[] = {
			
 
				+						&set<SPIRConstant>(offset, get_uint_type_id(), execution.workgroup_size.x, false),
			
 
				+						&set<SPIRConstant>(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false),
			
 
				+						&set<SPIRConstant>(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false)
			
 
				+					};
			
 
				+					set<SPIRConstant>(var_id, type_id, init, 3, false);
			
 
				+					execution.workgroup_size.constant = var_id;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					uint32_t type_ptr_id = ir.increase_bound_by(1);
			
 
				+					SPIRType uint_type_ptr = get<SPIRType>(type_id);
			
 
				+					uint_type_ptr.op = OpTypePointer;
			
 
				+					uint_type_ptr.pointer = true;
			
 
				+					uint_type_ptr.pointer_depth++;
			
 
				+					uint_type_ptr.parent_type = type_id;
			
 
				+					uint_type_ptr.storage = StorageClassInput;
			
 
				+
			
 
				+					auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
			
 
				+					ptr_type.self = type_id;
			
 
				+					set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
			
 
				+					mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
			
 
				+				}
			
 
				+				set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
			
 
				+				builtin_workgroup_size_id = var_id;
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		if (!has_frag_depth && force_frag_depth_passthrough)
			
@@ -1681,6 +1711,7 @@ string CompilerMSL::compile()
 
				 	backend.array_is_value_type_in_buffer_blocks = false;
			
 
				 	backend.support_pointer_to_pointer = true;
			
 
				 	backend.implicit_c_integer_promotion_rules = true;
			
 
				+	backend.supports_spec_constant_array_size = false;
			
 
				 
			
 
				 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
			
 
				 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
			
@@ -1841,7 +1872,7 @@ void CompilerMSL::preprocess_op_codes()
 
				 	if (preproc.uses_atomics)
			
 
				 	{
			
 
				 		add_header_line("#include <metal_atomic>");
			
 
				-		add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
			
 
				+		add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false);
			
 
				 	}
			
 
				 
			
 
				 	// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
			
@@ -1850,6 +1881,10 @@ void CompilerMSL::preprocess_op_codes()
 
				 	    (preproc.uses_image_write && !msl_options.supports_msl_version(2, 2)))
			
 
				 		is_rasterization_disabled = true;
			
 
				 
			
 
				+	// FIXME: This currently does not consider BDA side effects, so we cannot deduce const device for BDA.
			
 
				+	if (preproc.uses_buffer_write || preproc.uses_image_write)
			
 
				+		has_descriptor_side_effects = true;
			
 
				+
			
 
				 	// Tessellation control shaders are run as compute functions in Metal, and so
			
 
				 	// must capture their output to a buffer.
			
 
				 	if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
			
@@ -5720,22 +5755,44 @@ void CompilerMSL::emit_header()
 
				 {
			
 
				 	// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
			
 
				 	if (suppress_missing_prototypes)
			
 
				-		statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
			
 
				+		add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false);
			
 
				 	if (suppress_incompatible_pointer_types_discard_qualifiers)
			
 
				-		statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
			
 
				+		add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false);
			
 
				 
			
 
				 	// Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables
			
 
				 	if (suppress_sometimes_unitialized)
			
 
				-		statement("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"");
			
 
				+		add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false);
			
 
				 
			
 
				 	// Disable warning about missing braces for array<T> template to make arrays a value type
			
 
				 	if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
			
 
				-		statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
			
 
				+		add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false);
			
 
				+
			
 
				+	// Floating point fast math compile declarations
			
 
				+	if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2))
			
 
				+	{
			
 
				+		uint32_t contract_mask = FPFastMathModeAllowContractMask;
			
 
				+		uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask);
			
 
				+		uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask);
			
 
				+
			
 
				+		// FP math mode
			
 
				+		uint32_t fp_flags = get_fp_fast_math_flags(true);
			
 
				+		const char *math_mode = "safe";
			
 
				+		if ((fp_flags & fast_mask) == fast_mask)		// Must have all flags
			
 
				+			math_mode = "fast";
			
 
				+		else if ((fp_flags & relax_mask) == relax_mask)	// Must have all flags
			
 
				+			math_mode = "relaxed";
			
 
				+
			
 
				+		add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false);
			
 
				+
			
 
				+		// FP contraction
			
 
				+		const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ?  "fast" : "off";
			
 
				+		add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false);
			
 
				+	}
			
 
				 
			
 
				 	for (auto &pragma : pragma_lines)
			
 
				 		statement(pragma);
			
 
				 
			
 
				-	if (!pragma_lines.empty() || suppress_missing_prototypes)
			
 
				+	if (!pragma_lines.empty())
			
 
				 		statement("");
			
 
				 
			
 
				 	statement("#include <metal_stdlib>");
			
@@ -5755,18 +5812,23 @@ void CompilerMSL::emit_header()
 
				 		statement("");
			
 
				 }
			
 
				 
			
 
				-void CompilerMSL::add_pragma_line(const string &line)
			
 
				+void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique)
			
 
				 {
			
 
				-	auto rslt = pragma_lines.insert(line);
			
 
				-	if (rslt.second)
			
 
				-		force_recompile();
			
 
				+	if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end())
			
 
				+	{
			
 
				+		pragma_lines.push_back(line);
			
 
				+		if (recompile_on_unique)
			
 
				+			force_recompile();
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void CompilerMSL::add_typedef_line(const string &line)
			
 
				 {
			
 
				-	auto rslt = typedef_lines.insert(line);
			
 
				-	if (rslt.second)
			
 
				+	if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end())
			
 
				+	{
			
 
				+		typedef_lines.push_back(line);
			
 
				 		force_recompile();
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
			
@@ -8353,9 +8415,22 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 
				 					if (unique_func_constants[constant_id] == c.self)
			
 
				 						statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id,
			
 
				 						          ")]];");
			
 
				-					statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
			
 
				-					          ") ? ", bitcast_expression(type, sc_tmp_type, sc_tmp_name), " : ", constant_expression(c),
			
 
				-					          ";");
			
 
				+					// RenderDoc and other instrumentation may reuse the same SpecId with different base types.
			
 
				+					// We deduplicate to one [[function_constant(id)]] temp and then initialize all variants from it.
			
 
				+					// Metal forbids as_type to/from 'bool', so if either side is Boolean, avoid bitcasting here and
			
 
				+					// prefer a value cast via a constructor instead (e.g. uint(tmp) / float(tmp) / bool(tmp)).
			
 
				+					// This preserves expected toggle semantics and prevents illegal MSL like as_type<uint>(bool_tmp).
			
 
				+					{
			
 
				+						string sc_true_expr;
			
 
				+						if (sc_tmp_type == type.basetype)
			
 
				+							sc_true_expr = sc_tmp_name;
			
 
				+						else if (sc_tmp_type == SPIRType::Boolean || type.basetype == SPIRType::Boolean)
			
 
				+							sc_true_expr = join(sc_type_name, "(", sc_tmp_name, ")");
			
 
				+						else
			
 
				+							sc_true_expr = bitcast_expression(type, sc_tmp_type, sc_tmp_name);
			
 
				+						statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
			
 
				+						          ") ? ", sc_true_expr, " : ", constant_expression(c), ";");
			
 
				+					}
			
 
				 				}
			
 
				 				else if (has_decoration(c.self, DecorationSpecId))
			
 
				 				{
			
@@ -9161,7 +9236,12 @@ bool CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, cons
 
				 	// and there is a risk of concurrent write access to other components,
			
 
				 	// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
			
 
				 	// The MSL compiler refuses to allow component-level access for any non-packed vector types.
			
 
				-	if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
			
 
				+	// MSL refuses to take address or reference to vector component, even for packed types, so just force
			
 
				+	// through the pointer cast. No much we can do sadly.
			
 
				+	// For packed types, we could technically omit this if we know the reference does not have to turn into a pointer
			
 
				+	// of some kind, but that requires external analysis passes to figure out, and
			
 
				+	// this case is likely rare enough that we don't need to bother.
			
 
				+	if (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)
			
 
				 	{
			
 
				 		const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
			
 
				 		expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
			
@@ -9485,21 +9565,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
				 		break;
			
 
				 
			
 
				 	case OpFMul:
			
 
				-		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
			
 
				+		if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
			
 
				 			MSL_BFOP(spvFMul);
			
 
				 		else
			
 
				 			MSL_BOP(*);
			
 
				 		break;
			
 
				 
			
 
				 	case OpFAdd:
			
 
				-		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
			
 
				+		if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
			
 
				 			MSL_BFOP(spvFAdd);
			
 
				 		else
			
 
				 			MSL_BOP(+);
			
 
				 		break;
			
 
				 
			
 
				 	case OpFSub:
			
 
				-		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
			
 
				+		if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
			
 
				 			MSL_BFOP(spvFSub);
			
 
				 		else
			
 
				 			MSL_BOP(-);
			
@@ -9997,7 +10077,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
				 	case OpVectorTimesMatrix:
			
 
				 	case OpMatrixTimesVector:
			
 
				 	{
			
 
				-		if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
			
 
				+		if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
			
 
				 		{
			
 
				 			CompilerGLSL::emit_instruction(instruction);
			
 
				 			break;
			
@@ -10039,7 +10119,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
				 
			
 
				 	case OpMatrixTimesMatrix:
			
 
				 	{
			
 
				-		if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
			
 
				+		if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
			
 
				 		{
			
 
				 			CompilerGLSL::emit_instruction(instruction);
			
 
				 			break;
			
@@ -10725,13 +10805,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
 
				 	auto *lhs_var = maybe_get_backing_variable(lhs_id);
			
 
				 	if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
			
 
				 		lhs_is_array_template = true;
			
 
				-	else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
			
 
				+	else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(lhs_var->basetype)))
			
 
				 		lhs_is_array_template = false;
			
 
				 
			
 
				 	auto *rhs_var = maybe_get_backing_variable(rhs_id);
			
 
				 	if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
			
 
				 		rhs_is_array_template = true;
			
 
				-	else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
			
 
				+	else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(rhs_var->basetype)))
			
 
				 		rhs_is_array_template = false;
			
 
				 
			
 
				 	// If threadgroup storage qualifiers are *not* used:
			
@@ -11182,35 +11262,76 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
				 
			
 
				 	auto &restype = get<SPIRType>(result_type);
			
 
				 
			
 
				+	// Only precise:: preserves NaN in trancendentals (supposedly, cannot find documentation for this).
			
 
				+	const auto drop_nan_inf = FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask;
			
 
				+	bool preserve_nan = (get_fp_fast_math_flags_for_op(result_type, id) & drop_nan_inf) != drop_nan_inf;
			
 
				+	const char *preserve_str = preserve_nan ? "precise" : "fast";
			
 
				+
			
 
				+	// TODO: Emit the default behavior to match existing code. Might need to be revisited.
			
 
				+	// Only fp32 has the precise:: override.
			
 
				+#define EMIT_PRECISE_OVERRIDE(glsl_op, op) \
			
 
				+	case GLSLstd450##glsl_op: \
			
 
				+		if (restype.basetype == SPIRType::Float && preserve_nan) \
			
 
				+			emit_unary_func_op(result_type, id, args[0], "precise::" op); \
			
 
				+		else \
			
 
				+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); \
			
 
				+		break
			
 
				+
			
 
				 	switch (op)
			
 
				 	{
			
 
				+	EMIT_PRECISE_OVERRIDE(Cos, "cos");
			
 
				+	EMIT_PRECISE_OVERRIDE(Sin, "sin");
			
 
				+	EMIT_PRECISE_OVERRIDE(Tan, "tan");
			
 
				+	EMIT_PRECISE_OVERRIDE(Acos, "acos");
			
 
				+	EMIT_PRECISE_OVERRIDE(Asin, "asin");
			
 
				+	EMIT_PRECISE_OVERRIDE(Atan, "atan");
			
 
				+	EMIT_PRECISE_OVERRIDE(Exp, "exp");
			
 
				+	EMIT_PRECISE_OVERRIDE(Exp2, "exp2");
			
 
				+	EMIT_PRECISE_OVERRIDE(Log, "log");
			
 
				+	EMIT_PRECISE_OVERRIDE(Log2, "log2");
			
 
				+	EMIT_PRECISE_OVERRIDE(Sqrt, "sqrt");
			
 
				+#undef EMIT_PRECISE_OVERRIDE
			
 
				+
			
 
				 	case GLSLstd450Sinh:
			
 
				 		if (restype.basetype == SPIRType::Half)
			
 
				 		{
			
 
				+			auto ftype = restype;
			
 
				+			ftype.basetype = SPIRType::Float;
			
 
				+
			
 
				 			// MSL does not have overload for half. Force-cast back to half.
			
 
				-			auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))");
			
 
				+			auto expr = join(type_to_glsl(restype), "(", preserve_str, "::sinh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
			
 
				 			emit_op(result_type, id, expr, should_forward(args[0]));
			
 
				 			inherit_expression_dependencies(id, args[0]);
			
 
				 		}
			
 
				+		else if (preserve_nan)
			
 
				+			emit_unary_func_op(result_type, id, args[0], "precise::sinh");
			
 
				 		else
			
 
				 			emit_unary_func_op(result_type, id, args[0], "fast::sinh");
			
 
				 		break;
			
 
				 	case GLSLstd450Cosh:
			
 
				 		if (restype.basetype == SPIRType::Half)
			
 
				 		{
			
 
				+			auto ftype = restype;
			
 
				+			ftype.basetype = SPIRType::Float;
			
 
				+
			
 
				 			// MSL does not have overload for half. Force-cast back to half.
			
 
				-			auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))");
			
 
				+			auto expr = join(type_to_glsl(restype), "(", preserve_str, "::cosh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
			
 
				 			emit_op(result_type, id, expr, should_forward(args[0]));
			
 
				 			inherit_expression_dependencies(id, args[0]);
			
 
				 		}
			
 
				+		else if (preserve_nan)
			
 
				+			emit_unary_func_op(result_type, id, args[0], "precise::cosh");
			
 
				 		else
			
 
				 			emit_unary_func_op(result_type, id, args[0], "fast::cosh");
			
 
				 		break;
			
 
				 	case GLSLstd450Tanh:
			
 
				 		if (restype.basetype == SPIRType::Half)
			
 
				 		{
			
 
				+			auto ftype = restype;
			
 
				+			ftype.basetype = SPIRType::Float;
			
 
				+
			
 
				 			// MSL does not have overload for half. Force-cast back to half.
			
 
				-			auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))");
			
 
				+			auto expr = join(type_to_glsl(restype), "(", preserve_str, "::tanh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
			
 
				 			emit_op(result_type, id, expr, should_forward(args[0]));
			
 
				 			inherit_expression_dependencies(id, args[0]);
			
 
				 		}
			
@@ -11221,7 +11342,13 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
				 		if (restype.basetype == SPIRType::Half)
			
 
				 		{
			
 
				 			// MSL does not have overload for half. Force-cast back to half.
			
 
				-			auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))");
			
 
				+			auto ftype = restype;
			
 
				+			ftype.basetype = SPIRType::Float;
			
 
				+
			
 
				+			auto expr = join(type_to_glsl(restype),
			
 
				+			                 "(", preserve_str, "::atan2(",
			
 
				+			                 type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), "), ",
			
 
				+			                 type_to_glsl(ftype), "(", to_unpacked_expression(args[1]), ")))");
			
 
				 			emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]));
			
 
				 			inherit_expression_dependencies(id, args[0]);
			
 
				 			inherit_expression_dependencies(id, args[1]);
			
@@ -11230,7 +11357,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
				 			emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
			
 
				 		break;
			
 
				 	case GLSLstd450InverseSqrt:
			
 
				-		emit_unary_func_op(result_type, id, args[0], "rsqrt");
			
 
				+		if (restype.basetype == SPIRType::Float && preserve_nan)
			
 
				+			emit_unary_func_op(result_type, id, args[0], "precise::rsqrt");
			
 
				+		else
			
 
				+			emit_unary_func_op(result_type, id, args[0], "rsqrt");
			
 
				 		break;
			
 
				 	case GLSLstd450RoundEven:
			
 
				 		emit_unary_func_op(result_type, id, args[0], "rint");
			
@@ -11462,11 +11592,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
				 	{
			
 
				 		auto &exp_type = expression_type(args[0]);
			
 
				 		// MSL does not support scalar versions here.
			
 
				-		// MSL has no implementation for normalize in the fast:: namespace for half2 and half3
			
 
				+		// MSL has no implementation for normalize in the fast:: namespace for half
			
 
				 		// Returns -1 or 1 for valid input, sign() does the job.
			
 
				+
			
 
				+		// precise::normalize asm looks ridiculous.
			
 
				+		// Don't think this actually matters unless proven otherwise.
			
 
				 		if (exp_type.vecsize == 1)
			
 
				 			emit_unary_func_op(result_type, id, args[0], "sign");
			
 
				-		else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half)
			
 
				+		else if (exp_type.basetype == SPIRType::Half)
			
 
				 			emit_unary_func_op(result_type, id, args[0], "normalize");
			
 
				 		else
			
 
				 			emit_unary_func_op(result_type, id, args[0], "fast::normalize");
			
@@ -11529,7 +11662,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
				 
			
 
				 	case GLSLstd450Pow:
			
 
				 		// powr makes x < 0.0 undefined, just like SPIR-V.
			
 
				-		emit_binary_func_op(result_type, id, args[0], args[1], "powr");
			
 
				+		if (restype.basetype == SPIRType::Float && preserve_nan)
			
 
				+			emit_binary_func_op(result_type, id, args[0], args[1], "precise::powr");
			
 
				+		else
			
 
				+			emit_binary_func_op(result_type, id, args[0], args[1], "powr");
			
 
				 		break;
			
 
				 
			
 
				 	default:
			
@@ -13753,6 +13889,24 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI
 
				 	return loc;
			
 
				 }
			
 
				 
			
 
				+bool CompilerMSL::entry_point_returns_stage_output() const
			
 
				+{
			
 
				+	if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
			
 
				+		return false;
			
 
				+	bool ep_should_return_output = !get_is_rasterization_disabled();
			
 
				+	return stage_out_var_id && ep_should_return_output;
			
 
				+}
			
 
				+
			
 
				+bool CompilerMSL::entry_point_requires_const_device_buffers() const
			
 
				+{
			
 
				+	// For fragment, we don't really need it, but it might help avoid pessimization
			
 
				+	// if the compiler deduces it needs to use late-Z for whatever reason.
			
 
				+	return (get_execution_model() == ExecutionModelFragment && !has_descriptor_side_effects) ||
			
 
				+	       (entry_point_returns_stage_output() &&
			
 
				+	        (get_execution_model() == ExecutionModelVertex ||
			
 
				+	         get_execution_model() == ExecutionModelTessellationEvaluation));
			
 
				+}
			
 
				+
			
 
				 // Returns the type declaration for a function, including the
			
 
				 // entry type if the current function is the entry point function
			
 
				 string CompilerMSL::func_type_decl(SPIRType &type)
			
@@ -13763,8 +13917,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 
				 		return return_type;
			
 
				 
			
 
				 	// If an outgoing interface block has been defined, and it should be returned, override the entry point return type
			
 
				-	bool ep_should_return_output = !get_is_rasterization_disabled();
			
 
				-	if (stage_out_var_id && ep_should_return_output)
			
 
				+	if (entry_point_returns_stage_output())
			
 
				 		return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0);
			
 
				 
			
 
				 	// Prepend a entry type, based on the execution model
			
@@ -13888,16 +14041,14 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
 
				 
			
 
				 	case StorageClassStorageBuffer:
			
 
				 	{
			
 
				-		// For arguments from variable pointers, we use the write count deduction, so
			
 
				-		// we should not assume any constness here. Only for global SSBOs.
			
 
				-		bool readonly = false;
			
 
				-		if (!var || has_decoration(type.self, DecorationBlock))
			
 
				-			readonly = flags.get(DecorationNonWritable);
			
 
				-
			
 
				-		if (decoration_flags_signal_coherent(flags))
			
 
				-			readonly = false;
			
 
				-
			
 
				-		addr_space = readonly ? "const device" : "device";
			
 
				+		// When dealing with descriptor aliasing, it becomes very problematic to make use of
			
 
				+		// readonly qualifiers.
			
 
				+		// If rasterization is not disabled in vertex/tese, Metal does not allow side effects and refuses to compile "device",
			
 
				+		// even if there are no writes. Just force const device.
			
 
				+		if (entry_point_requires_const_device_buffers())
			
 
				+			addr_space = "const device";
			
 
				+		else
			
 
				+			addr_space = "device";
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -13914,7 +14065,12 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
 
				 		{
			
 
				 			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
			
 
				 			if (ssbo)
			
 
				-				addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
			
 
				+			{
			
 
				+				if (entry_point_requires_const_device_buffers())
			
 
				+					addr_space = "const device";
			
 
				+				else
			
 
				+					addr_space = "device";
			
 
				+			}
			
 
				 			else
			
 
				 				addr_space = "constant";
			
 
				 		}
			
@@ -16553,7 +16709,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_i
 
				 	default:
			
 
				 		if (type_is_array_of_pointers(type) || using_builtin_array())
			
 
				 		{
			
 
				-			const SPIRVariable *var = variable_id ? &get<SPIRVariable>(variable_id) : nullptr;
			
 
				+			const SPIRVariable *var = variable_id ? maybe_get<SPIRVariable>(variable_id) : nullptr;
			
 
				 			if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) &&
			
 
				 			    is_array(get_variable_data_type(*var)))
			
 
				 			{
			
@@ -16854,6 +17010,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
 
				 		if (p_var && p_var->basevariable)
			
 
				 			p_var = maybe_get<SPIRVariable>(p_var->basevariable);
			
 
				 
			
 
				+		bool has_access_qualifier = true;
			
 
				+
			
 
				 		switch (img_type.access)
			
 
				 		{
			
 
				 		case AccessQualifierReadOnly:
			
@@ -16879,12 +17037,21 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
 
				 
			
 
				 				img_type_name += "write";
			
 
				 			}
			
 
				+			else
			
 
				+			{
			
 
				+				has_access_qualifier = false;
			
 
				+			}
			
 
				 			break;
			
 
				 		}
			
 
				 		}
			
 
				 
			
 
				 		if (p_var && has_decoration(p_var->self, DecorationCoherent) && msl_options.supports_msl_version(3, 2))
			
 
				+		{
			
 
				+			// Cannot declare memory_coherence_device without access qualifier.
			
 
				+			if (!has_access_qualifier)
			
 
				+				img_type_name += ", access::read";
			
 
				 			img_type_name += ", memory_coherence_device";
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	img_type_name += ">";
			
@@ -17317,13 +17484,18 @@ void CompilerMSL::emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t r
 
				 	inherit_expression_dependencies(result_id, op0);
			
 
				 }
			
 
				 
			
 
				+// Note: Metal forbids bitcasting to/from 'bool' using as_type. This function is used widely
			
 
				+// for generating casts in the backend. To avoid generating illegal MSL when the canonical
			
 
				+// function constant type (from deduplicated SpecId) is Boolean, fall back to value-cast in
			
 
				+// that case by returning type_to_glsl(out_type) instead of as_type<...>.
			
 
				 string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
			
 
				 {
			
 
				 	if (out_type.basetype == in_type.basetype)
			
 
				 		return "";
			
 
				 
			
 
				-	assert(out_type.basetype != SPIRType::Boolean);
			
 
				-	assert(in_type.basetype != SPIRType::Boolean);
			
 
				+	// Avoid bitcasting to/from booleans in MSL; use value cast instead.
			
 
				+	if (out_type.basetype == SPIRType::Boolean || in_type.basetype == SPIRType::Boolean)
			
 
				+		return type_to_glsl(out_type);
			
 
				 
			
 
				 	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
			
 
				 	bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
			
@@ -17679,6 +17851,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 
				 	case BuiltInGlobalInvocationId:
			
 
				 		return "thread_position_in_grid";
			
 
				 
			
 
				+	case BuiltInWorkgroupSize:
			
 
				+		return "threads_per_threadgroup";
			
 
				+
			
 
				 	case BuiltInWorkgroupId:
			
 
				 		return "threadgroup_position_in_grid";
			
 
				 
			
@@ -17864,6 +18039,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 
				 	case BuiltInLocalInvocationId:
			
 
				 	case BuiltInNumWorkgroups:
			
 
				 	case BuiltInWorkgroupId:
			
 
				+	case BuiltInWorkgroupSize:
			
 
				 		return "uint3";
			
 
				 	case BuiltInLocalInvocationIndex:
			
 
				 	case BuiltInNumSubgroups:
			
@@ -18531,11 +18707,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 
				 
			
 
				 	case OpFAdd:
			
 
				 	case OpFSub:
			
 
				-		if (compiler.msl_options.invariant_float_math ||
			
 
				-		    compiler.has_decoration(args[1], DecorationNoContraction))
			
 
				-		{
			
 
				+		if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
			
 
				 			return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
			
 
				-		}
			
 
				 		break;
			
 
				 
			
 
				 	case OpFMul:
			
@@ -18543,11 +18716,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 
				 	case OpMatrixTimesVector:
			
 
				 	case OpVectorTimesMatrix:
			
 
				 	case OpMatrixTimesMatrix:
			
 
				-		if (compiler.msl_options.invariant_float_math ||
			
 
				-		    compiler.has_decoration(args[1], DecorationNoContraction))
			
 
				-		{
			
 
				+		if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
			
 
				 			return SPVFuncImplFMul;
			
 
				-		}
			
 
				 		break;
			
 
				 
			
 
				 	case OpQuantizeToF16:
			
@@ -19572,7 +19742,7 @@ void CompilerMSL::analyze_argument_buffers()
 
				 				SetBindingPair pair = { desc_set, binding };
			
 
				 
			
 
				 				if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
			
 
				-				    resource.basetype == SPIRType::SampledImage)
			
 
				+				    resource.basetype == SPIRType::SampledImage || resource.basetype == SPIRType::AccelerationStructure)
			
 
				 				{
			
 
				 					// Drop pointer information when we emit the resources into a struct.
			
 
				 					buffer_type.member_types.push_back(get_variable_data_type_id(var));
			
@@ -19827,6 +19997,30 @@ bool CompilerMSL::specialization_constant_is_macro(uint32_t const_id) const
 
				 	return constant_macro_ids.find(const_id) != constant_macro_ids.end();
			
 
				 }
			
 
				 
			
 
				+// Start with all fast math flags enabled, and selectively disable based execution modes and float controls
			
 
				+uint32_t CompilerMSL::get_fp_fast_math_flags(bool incl_ops) const
			
 
				+{
			
 
				+	uint32_t fp_flags = ~0;
			
 
				+	auto &ep = get_entry_point();
			
 
				+
			
 
				+	if (ep.flags.get(ExecutionModeSignedZeroInfNanPreserve))
			
 
				+		fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask);
			
 
				+
			
 
				+	if (ep.flags.get(ExecutionModeContractionOff))
			
 
				+		fp_flags &= ~(FPFastMathModeAllowContractMask);
			
 
				+
			
 
				+	for (auto &fp_pair : ep.fp_fast_math_defaults)
			
 
				+		if (fp_pair.second)
			
 
				+			fp_flags &= get<SPIRConstant>(fp_pair.second).scalar();
			
 
				+
			
 
				+	if (incl_ops)
			
 
				+		for (auto &p_m : ir.meta)
			
 
				+			if (p_m.second.decoration.decoration_flags.get(DecorationFPFastMathMode))
			
 
				+				fp_flags &= p_m.second.decoration.fp_fast_math_mode;
			
 
				+
			
 
				+	return fp_flags;
			
 
				+}
			
 
				+
			
 
				 void CompilerMSL::emit_block_hints(const SPIRBlock &)
			
 
				 {
			
 
				 }
			
--- a/3rdparty/spirv-cross/spirv_msl.hpp
+++ b/3rdparty/spirv-cross/spirv_msl.hpp
@@ -542,6 +542,10 @@ public:
 
				 		// The result can be queried with get_is_rasterization_disabled.
			
 
				 		bool auto_disable_rasterization = false;
			
 
				 
			
 
				+		// Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes.
			
 
				+		// Requires MSL 3.2 or above, and has no effect with earlier MSL versions.
			
 
				+		bool use_fast_math_pragmas = false;
			
 
				+
			
 
				 		bool is_ios() const
			
 
				 		{
			
 
				 			return platform == iOS;
			
@@ -767,6 +771,14 @@ public:
 
				 	// These must only be called after a successful call to CompilerMSL::compile().
			
 
				 	bool specialization_constant_is_macro(uint32_t constant_id) const;
			
 
				 
			
 
				+	// Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied
			
 
				+	// across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags
			
 
				+	// for one floating-point type, and the value returned here is a bitwise-AND combination across all types.
			
 
				+	// If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND
			
 
				+	// to determine the minimal fast-math that applies to all default execution modes and all operations.
			
 
				+	// The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff.
			
 
				+	uint32_t get_fp_fast_math_flags(bool incl_ops) const;
			
 
				+
			
 
				 protected:
			
 
				 	// An enum of SPIR-V functions that are implemented in additional
			
 
				 	// source code that is added to the shader if necessary.
			
@@ -1047,6 +1059,8 @@ protected:
 
				 
			
 
				 	void fix_up_shader_inputs_outputs();
			
 
				 
			
 
				+	bool entry_point_returns_stage_output() const;
			
 
				+	bool entry_point_requires_const_device_buffers() const;
			
 
				 	std::string func_type_decl(SPIRType &type);
			
 
				 	std::string entry_point_args_classic(bool append_comma);
			
 
				 	std::string entry_point_args_argument_buffer(bool append_comma);
			
@@ -1126,7 +1140,7 @@ protected:
 
				 	                         uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
			
 
				 	                         bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
			
 
				 	const char *get_memory_order(uint32_t spv_mem_sem);
			
 
				-	void add_pragma_line(const std::string &line);
			
 
				+	void add_pragma_line(const std::string &line, bool recompile_on_unique);
			
 
				 	void add_typedef_line(const std::string &line);
			
 
				 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
			
 
				 	bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
			
@@ -1209,8 +1223,8 @@ protected:
 
				 	std::unordered_map<uint32_t, uint32_t> fragment_output_components;
			
 
				 	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
			
 
				 	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
			
 
				-	std::set<std::string> pragma_lines;
			
 
				-	std::set<std::string> typedef_lines;
			
 
				+	std::vector<std::string> pragma_lines;
			
 
				+	std::vector<std::string> typedef_lines;
			
 
				 	SmallVector<uint32_t> vars_needing_early_declaration;
			
 
				 	std::unordered_set<uint32_t> constant_macro_ids;
			
 
				 
			
@@ -1252,6 +1266,7 @@ protected:
 
				 	bool using_builtin_array() const;
			
 
				 
			
 
				 	bool is_rasterization_disabled = false;
			
 
				+	bool has_descriptor_side_effects = false;
			
 
				 	bool capture_output_to_buffer = false;
			
 
				 	bool needs_swizzle_buffer_def = false;
			
 
				 	bool used_swizzle_buffer = false;
			
--- a/3rdparty/spirv-cross/spirv_parser.cpp
+++ b/3rdparty/spirv-cross/spirv_parser.cpp
@@ -213,8 +213,8 @@ void Parser::parse(const Instruction &instruction)
 
				 
			
 
				 	case OpSource:
			
 
				 	{
			
 
				-		auto lang = static_cast<SourceLanguage>(ops[0]);
			
 
				-		switch (lang)
			
 
				+		ir.source.lang = static_cast<SourceLanguage>(ops[0]);
			
 
				+		switch (ir.source.lang)
			
 
				 		{
			
 
				 		case SourceLanguageESSL:
			
 
				 			ir.source.es = true;
			
@@ -318,6 +318,19 @@ void Parser::parse(const Instruction &instruction)
 
				 					ir.load_type_width.insert({ ops[1], type->width });
			
 
				 			}
			
 
				 		}
			
 
				+		else if (op == OpExtInst)
			
 
				+		{
			
 
				+			// Don't want to deal with ForwardRefs here.
			
 
				+
			
 
				+			auto &ext = get<SPIRExtension>(ops[2]);
			
 
				+			if (ext.ext == SPIRExtension::NonSemanticShaderDebugInfo)
			
 
				+			{
			
 
				+				// Parse global ShaderDebugInfo we care about.
			
 
				+				// Just forward the string information.
			
 
				+				if (ops[3] == SPIRExtension::DebugSource)
			
 
				+					set<SPIRString>(ops[1], get<SPIRString>(ops[4]).str);
			
 
				+			}
			
 
				+		}
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -369,6 +382,30 @@ void Parser::parse(const Instruction &instruction)
 
				 			execution.output_primitives = ops[2];
			
 
				 			break;
			
 
				 
			
 
				+		case ExecutionModeSignedZeroInfNanPreserve:
			
 
				+			switch (ops[2])
			
 
				+			{
			
 
				+			case 8:
			
 
				+				execution.signed_zero_inf_nan_preserve_8 = true;
			
 
				+				break;
			
 
				+
			
 
				+			case 16:
			
 
				+				execution.signed_zero_inf_nan_preserve_16 = true;
			
 
				+				break;
			
 
				+
			
 
				+			case 32:
			
 
				+				execution.signed_zero_inf_nan_preserve_32 = true;
			
 
				+				break;
			
 
				+
			
 
				+			case 64:
			
 
				+				execution.signed_zero_inf_nan_preserve_64 = true;
			
 
				+				break;
			
 
				+
			
 
				+			default:
			
 
				+				SPIRV_CROSS_THROW("Invalid bit-width for SignedZeroInfNanPreserve.");
			
 
				+			}
			
 
				+			break;
			
 
				+
			
 
				 		default:
			
 
				 			break;
			
 
				 		}
			
@@ -381,13 +418,21 @@ void Parser::parse(const Instruction &instruction)
 
				 		auto mode = static_cast<ExecutionMode>(ops[1]);
			
 
				 		execution.flags.set(mode);
			
 
				 
			
 
				-		if (mode == ExecutionModeLocalSizeId)
			
 
				+		switch (mode)
			
 
				 		{
			
 
				+		case ExecutionModeLocalSizeId:
			
 
				 			execution.workgroup_size.id_x = ops[2];
			
 
				 			execution.workgroup_size.id_y = ops[3];
			
 
				 			execution.workgroup_size.id_z = ops[4];
			
 
				-		}
			
 
				+			break;
			
 
				+
			
 
				+		case ExecutionModeFPFastMathDefault:
			
 
				+			execution.fp_fast_math_defaults[ops[2]] = ops[3];
			
 
				+			break;
			
 
				 
			
 
				+		default:
			
 
				+			break;
			
 
				+		}
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -538,7 +583,7 @@ void Parser::parse(const Instruction &instruction)
 
				 		uint32_t width = ops[1];
			
 
				 		auto &type = set<SPIRType>(id, op);
			
 
				 
			
 
				-		if (width != 16 && length > 2)
			
 
				+		if (width != 16 && width != 8 && length > 2)
			
 
				 			SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat.");
			
 
				 
			
 
				 		if (width == 64)
			
@@ -557,6 +602,17 @@ void Parser::parse(const Instruction &instruction)
 
				 			else
			
 
				 				type.basetype = SPIRType::Half;
			
 
				 		}
			
 
				+		else if (width == 8)
			
 
				+		{
			
 
				+			if (length < 2)
			
 
				+				SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8.");
			
 
				+			else if (ops[2] == spv::FPEncodingFloat8E4M3EXT)
			
 
				+				type.basetype = SPIRType::FloatE4M3;
			
 
				+			else if (ops[2] == spv::FPEncodingFloat8E5M2EXT)
			
 
				+				type.basetype = SPIRType::FloatE5M2;
			
 
				+			else
			
 
				+				SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8.");
			
 
				+		}
			
 
				 		else
			
 
				 			SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
			
 
				 		type.width = width;
			
@@ -614,15 +670,33 @@ void Parser::parse(const Instruction &instruction)
 
				 		auto &matrixbase = set<SPIRType>(id, base);
			
 
				 
			
 
				 		matrixbase.op = op;
			
 
				-		matrixbase.cooperative.scope_id = ops[2];
			
 
				-		matrixbase.cooperative.rows_id = ops[3];
			
 
				-		matrixbase.cooperative.columns_id = ops[4];
			
 
				-		matrixbase.cooperative.use_id = ops[5];
			
 
				+		matrixbase.ext.cooperative.scope_id = ops[2];
			
 
				+		matrixbase.ext.cooperative.rows_id = ops[3];
			
 
				+		matrixbase.ext.cooperative.columns_id = ops[4];
			
 
				+		matrixbase.ext.cooperative.use_id = ops[5];
			
 
				 		matrixbase.self = id;
			
 
				 		matrixbase.parent_type = ops[1];
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	case OpTypeCooperativeVectorNV:
			
 
				+	{
			
 
				+		uint32_t id = ops[0];
			
 
				+		auto &type = set<SPIRType>(id, op);
			
 
				+
			
 
				+		type.basetype = SPIRType::CoopVecNV;
			
 
				+		type.op = op;
			
 
				+		type.ext.coopVecNV.component_type_id = ops[1];
			
 
				+		type.ext.coopVecNV.component_count_id = ops[2];
			
 
				+		type.parent_type = ops[1];
			
 
				+
			
 
				+		// CoopVec-Nv can be used with integer operations like SMax where
			
 
				+		// where spirv-opt does explicit checks on integer bitwidth
			
 
				+		auto component_type = get<SPIRType>(type.ext.coopVecNV.component_type_id);
			
 
				+		type.width = component_type.width;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	case OpTypeArray:
			
 
				 	{
			
 
				 		uint32_t id = ops[0];
			
@@ -820,6 +894,20 @@ void Parser::parse(const Instruction &instruction)
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	case OpTypeTensorARM:
			
 
				+	{
			
 
				+		uint32_t id = ops[0];
			
 
				+		auto &type = set<SPIRType>(id, op);
			
 
				+		type.basetype = SPIRType::Tensor;
			
 
				+		type.ext.tensor = {};
			
 
				+		type.ext.tensor.type = ops[1];
			
 
				+		if (length >= 3)
			
 
				+			type.ext.tensor.rank = ops[2];
			
 
				+		if (length >= 4)
			
 
				+			type.ext.tensor.shape = ops[3];
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	// Variable declaration
			
 
				 	// All variables are essentially pointers with a storage qualifier.
			
 
				 	case OpVariable:
			
@@ -866,17 +954,27 @@ void Parser::parse(const Instruction &instruction)
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-		// Constants
			
 
				+	// Constants
			
 
				 	case OpSpecConstant:
			
 
				 	case OpConstant:
			
 
				+	case OpConstantCompositeReplicateEXT:
			
 
				+	case OpSpecConstantCompositeReplicateEXT:
			
 
				 	{
			
 
				 		uint32_t id = ops[1];
			
 
				 		auto &type = get<SPIRType>(ops[0]);
			
 
				-
			
 
				-		if (type.width > 32)
			
 
				-			set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
			
 
				+		if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT)
			
 
				+		{
			
 
				+			auto subconstant = uint32_t(ops[2]);
			
 
				+			set<SPIRConstant>(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true);
			
 
				+		}
			
 
				 		else
			
 
				-			set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
			
 
				+		{
			
 
				+
			
 
				+			if (type.width > 32)
			
 
				+				set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
			
 
				+			else
			
 
				+				set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
			
 
				+		}
			
 
				 		break;
			
 
				 	}
			
 
				 
			
--- a/3rdparty/spirv-cross/spirv_reflect.cpp
+++ b/3rdparty/spirv-cross/spirv_reflect.cpp
@@ -477,6 +477,12 @@ string CompilerReflection::execution_model_to_str(spv::ExecutionModel model)
 
				 		return "rmiss";
			
 
				 	case ExecutionModelCallableNV:
			
 
				 		return "rcall";
			
 
				+	case ExecutionModelMeshNV:
			
 
				+	case ExecutionModelMeshEXT:
			
 
				+		return "mesh";
			
 
				+	case ExecutionModelTaskNV:
			
 
				+	case ExecutionModelTaskEXT:
			
 
				+		return "task";
			
 
				 	default:
			
 
				 		return "???";
			
 
				 	}
			
@@ -504,7 +510,9 @@ void CompilerReflection::emit_entry_points()
 
				 			json_stream->begin_json_object();
			
 
				 			json_stream->emit_json_key_value("name", e.name);
			
 
				 			json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
			
 
				-			if (e.execution_model == ExecutionModelGLCompute)
			
 
				+			if (e.execution_model == ExecutionModelGLCompute || e.execution_model == spv::ExecutionModelMeshEXT ||
			
 
				+			    e.execution_model == spv::ExecutionModelMeshNV || e.execution_model == spv::ExecutionModelTaskEXT ||
			
 
				+			    e.execution_model == spv::ExecutionModelTaskNV)
			
 
				 			{
			
 
				 				const auto &spv_entry = get_entry_point(e.name, e.execution_model);
			
 
				 
			
@@ -547,6 +555,7 @@ void CompilerReflection::emit_resources()
 
				 	emit_resources("push_constants", res.push_constant_buffers);
			
 
				 	emit_resources("counters", res.atomic_counters);
			
 
				 	emit_resources("acceleration_structures", res.acceleration_structures);
			
 
				+	emit_resources("tensors", res.tensors);
			
 
				 }
			
 
				 
			
 
				 void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resource> &resources)