Explorar o código

Updated spirv-cross.

Бранимир Караџић hai 3 meses
pai
achega
686190d7ff

+ 1 - 0
3rdparty/spirv-cross/main.cpp

@@ -536,6 +536,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 	print_resources(compiler, "push", res.push_constant_buffers);
 	print_resources(compiler, "counters", res.atomic_counters);
 	print_resources(compiler, "acceleration structures", res.acceleration_structures);
+	print_resources(compiler, "tensors", res.tensors);
 	print_resources(compiler, "record buffers", res.shader_record_buffers);
 	print_resources(compiler, spv::StorageClassInput, res.builtin_inputs);
 	print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs);

+ 104 - 22
3rdparty/spirv-cross/spirv.h

@@ -1,27 +1,11 @@
 /*
-** Copyright (c) 2014-2024 The Khronos Group Inc.
+** Copyright: 2014-2024 The Khronos Group Inc.
+** License: MIT
 ** 
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and/or associated documentation files (the "Materials"),
-** to deal in the Materials without restriction, including without limitation
-** the rights to use, copy, modify, merge, publish, distribute, sublicense,
-** and/or sell copies of the Materials, and to permit persons to whom the
-** Materials are furnished to do so, subject to the following conditions:
-** 
-** The above copyright notice and this permission notice shall be included in
-** all copies or substantial portions of the Materials.
-** 
-** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
-** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
-** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
-** 
-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
-** IN THE MATERIALS.
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+** KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+** SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+** https://www.khronos.org/registry/
 */
 
 /*
@@ -176,6 +160,8 @@ typedef enum SpvExecutionMode_ {
     SpvExecutionModeSignedZeroInfNanPreserve = 4461,
     SpvExecutionModeRoundingModeRTE = 4462,
     SpvExecutionModeRoundingModeRTZ = 4463,
+    SpvExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
+    SpvExecutionModeTileShadingRateQCOM = 4490,
     SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
     SpvExecutionModeStencilRefReplacingEXT = 5027,
     SpvExecutionModeCoalescingAMDX = 5069,
@@ -245,6 +231,7 @@ typedef enum SpvStorageClass_ {
     SpvStorageClassImage = 11,
     SpvStorageClassStorageBuffer = 12,
     SpvStorageClassTileImageEXT = 4172,
+    SpvStorageClassTileAttachmentQCOM = 4491,
     SpvStorageClassNodePayloadAMDX = 5068,
     SpvStorageClassCallableDataKHR = 5328,
     SpvStorageClassCallableDataNV = 5328,
@@ -554,6 +541,7 @@ typedef enum SpvDecoration_ {
     SpvDecorationMaxByteOffset = 45,
     SpvDecorationAlignmentId = 46,
     SpvDecorationMaxByteOffsetId = 47,
+    SpvDecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
     SpvDecorationNoSignedWrap = 4469,
     SpvDecorationNoUnsignedWrap = 4470,
     SpvDecorationWeightTextureQCOM = 4487,
@@ -723,6 +711,9 @@ typedef enum SpvBuiltIn_ {
     SpvBuiltInDeviceIndex = 4438,
     SpvBuiltInViewIndex = 4440,
     SpvBuiltInShadingRateKHR = 4444,
+    SpvBuiltInTileOffsetQCOM = 4492,
+    SpvBuiltInTileDimensionQCOM = 4493,
+    SpvBuiltInTileApronSizeQCOM = 4494,
     SpvBuiltInBaryCoordNoPerspAMD = 4992,
     SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993,
     SpvBuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -1073,7 +1064,13 @@ typedef enum SpvCapability_ {
     SpvCapabilityTileImageColorReadAccessEXT = 4166,
     SpvCapabilityTileImageDepthReadAccessEXT = 4167,
     SpvCapabilityTileImageStencilReadAccessEXT = 4168,
+    SpvCapabilityTensorsARM = 4174,
+    SpvCapabilityStorageTensorArrayDynamicIndexingARM = 4175,
+    SpvCapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
+    SpvCapabilityGraphARM = 4191,
     SpvCapabilityCooperativeMatrixLayoutsARM = 4201,
+    SpvCapabilityFloat8EXT = 4212,
+    SpvCapabilityFloat8CooperativeMatrixEXT = 4213,
     SpvCapabilityFragmentShadingRateKHR = 4422,
     SpvCapabilitySubgroupBallotKHR = 4423,
     SpvCapabilityDrawParameters = 4427,
@@ -1109,6 +1106,7 @@ typedef enum SpvCapability_ {
     SpvCapabilityTextureSampleWeightedQCOM = 4484,
     SpvCapabilityTextureBoxFilterQCOM = 4485,
     SpvCapabilityTextureBlockMatchQCOM = 4486,
+    SpvCapabilityTileShadingQCOM = 4495,
     SpvCapabilityTextureBlockMatch2QCOM = 4498,
     SpvCapabilityFloat16ImageAMD = 5008,
     SpvCapabilityImageGatherBiasLodAMD = 5009,
@@ -1119,6 +1117,8 @@ typedef enum SpvCapability_ {
     SpvCapabilityShaderClockKHR = 5055,
     SpvCapabilityShaderEnqueueAMDX = 5067,
     SpvCapabilityQuadControlKHR = 5087,
+    SpvCapabilityInt4TypeINTEL = 5112,
+    SpvCapabilityInt4CooperativeMatrixINTEL = 5114,
     SpvCapabilityBFloat16TypeKHR = 5116,
     SpvCapabilityBFloat16DotProductKHR = 5117,
     SpvCapabilityBFloat16CooperativeMatrixKHR = 5118,
@@ -1287,6 +1287,7 @@ typedef enum SpvCapability_ {
     SpvCapabilityMaskedGatherScatterINTEL = 6427,
     SpvCapabilityCacheControlsINTEL = 6441,
     SpvCapabilityRegisterLimitsINTEL = 6460,
+    SpvCapabilityBindlessImagesINTEL = 6528,
     SpvCapabilityMax = 0x7fffffff,
 } SpvCapability;
 
@@ -1463,6 +1464,24 @@ typedef enum SpvTensorAddressingOperandsMask_ {
     SpvTensorAddressingOperandsDecodeFuncMask = 0x00000002,
 } SpvTensorAddressingOperandsMask;
 
+typedef enum SpvTensorOperandsShift_ {
+    SpvTensorOperandsNontemporalARMShift = 0,
+    SpvTensorOperandsOutOfBoundsValueARMShift = 1,
+    SpvTensorOperandsMakeElementAvailableARMShift = 2,
+    SpvTensorOperandsMakeElementVisibleARMShift = 3,
+    SpvTensorOperandsNonPrivateElementARMShift = 4,
+    SpvTensorOperandsMax = 0x7fffffff,
+} SpvTensorOperandsShift;
+
+typedef enum SpvTensorOperandsMask_ {
+    SpvTensorOperandsMaskNone = 0,
+    SpvTensorOperandsNontemporalARMMask = 0x00000001,
+    SpvTensorOperandsOutOfBoundsValueARMMask = 0x00000002,
+    SpvTensorOperandsMakeElementAvailableARMMask = 0x00000004,
+    SpvTensorOperandsMakeElementVisibleARMMask = 0x00000008,
+    SpvTensorOperandsNonPrivateElementARMMask = 0x00000010,
+} SpvTensorOperandsMask;
+
 typedef enum SpvInitializationModeQualifier_ {
     SpvInitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
     SpvInitializationModeQualifierInitOnDeviceResetINTEL = 1,
@@ -1549,6 +1568,8 @@ typedef enum SpvRawAccessChainOperandsMask_ {
 
 typedef enum SpvFPEncoding_ {
     SpvFPEncodingBFloat16KHR = 0,
+    SpvFPEncodingFloat8E4M3EXT = 4214,
+    SpvFPEncodingFloat8E5M2EXT = 4215,
     SpvFPEncodingMax = 0x7fffffff,
 } SpvFPEncoding;
 
@@ -1927,6 +1948,17 @@ typedef enum SpvOp_ {
     SpvOpColorAttachmentReadEXT = 4160,
     SpvOpDepthAttachmentReadEXT = 4161,
     SpvOpStencilAttachmentReadEXT = 4162,
+    SpvOpTypeTensorARM = 4163,
+    SpvOpTensorReadARM = 4164,
+    SpvOpTensorWriteARM = 4165,
+    SpvOpTensorQuerySizeARM = 4166,
+    SpvOpGraphConstantARM = 4181,
+    SpvOpGraphEntryPointARM = 4182,
+    SpvOpGraphARM = 4183,
+    SpvOpGraphInputARM = 4184,
+    SpvOpGraphSetOutputARM = 4185,
+    SpvOpGraphEndARM = 4186,
+    SpvOpTypeGraphARM = 4190,
     SpvOpTerminateInvocation = 4416,
     SpvOpTypeUntypedPointerKHR = 4417,
     SpvOpUntypedVariableKHR = 4418,
@@ -2385,6 +2417,9 @@ typedef enum SpvOp_ {
     SpvOpRoundFToTF32INTEL = 6426,
     SpvOpMaskedGatherINTEL = 6428,
     SpvOpMaskedScatterINTEL = 6429,
+    SpvOpConvertHandleToImageINTEL = 6529,
+    SpvOpConvertHandleToSamplerINTEL = 6530,
+    SpvOpConvertHandleToSampledImageINTEL = 6531,
     SpvOpMax = 0x7fffffff,
 } SpvOp;
 
@@ -2743,6 +2778,17 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
     case SpvOpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
     case SpvOpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
     case SpvOpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTensorReadARM: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGraphARM: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGraphInputARM: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGraphEndARM: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
     case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
     case SpvOpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
     case SpvOpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
@@ -3190,6 +3236,9 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
     case SpvOpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
     case SpvOpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
     case SpvOpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
     }
 }
 inline const char* SpvSourceLanguageToString(SpvSourceLanguage value) {
@@ -3305,6 +3354,8 @@ inline const char* SpvExecutionModeToString(SpvExecutionMode value) {
     case SpvExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
     case SpvExecutionModeRoundingModeRTE: return "RoundingModeRTE";
     case SpvExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
+    case SpvExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
+    case SpvExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
     case SpvExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
     case SpvExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
     case SpvExecutionModeCoalescingAMDX: return "CoalescingAMDX";
@@ -3371,6 +3422,7 @@ inline const char* SpvStorageClassToString(SpvStorageClass value) {
     case SpvStorageClassImage: return "Image";
     case SpvStorageClassStorageBuffer: return "StorageBuffer";
     case SpvStorageClassTileImageEXT: return "TileImageEXT";
+    case SpvStorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
     case SpvStorageClassNodePayloadAMDX: return "NodePayloadAMDX";
     case SpvStorageClassCallableDataKHR: return "CallableDataKHR";
     case SpvStorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
@@ -3619,6 +3671,7 @@ inline const char* SpvDecorationToString(SpvDecoration value) {
     case SpvDecorationMaxByteOffset: return "MaxByteOffset";
     case SpvDecorationAlignmentId: return "AlignmentId";
     case SpvDecorationMaxByteOffsetId: return "MaxByteOffsetId";
+    case SpvDecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
     case SpvDecorationNoSignedWrap: return "NoSignedWrap";
     case SpvDecorationNoUnsignedWrap: return "NoUnsignedWrap";
     case SpvDecorationWeightTextureQCOM: return "WeightTextureQCOM";
@@ -3778,6 +3831,9 @@ inline const char* SpvBuiltInToString(SpvBuiltIn value) {
     case SpvBuiltInDeviceIndex: return "DeviceIndex";
     case SpvBuiltInViewIndex: return "ViewIndex";
     case SpvBuiltInShadingRateKHR: return "ShadingRateKHR";
+    case SpvBuiltInTileOffsetQCOM: return "TileOffsetQCOM";
+    case SpvBuiltInTileDimensionQCOM: return "TileDimensionQCOM";
+    case SpvBuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
     case SpvBuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
     case SpvBuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
     case SpvBuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
@@ -3958,7 +4014,13 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
     case SpvCapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
     case SpvCapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
     case SpvCapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
+    case SpvCapabilityTensorsARM: return "TensorsARM";
+    case SpvCapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
+    case SpvCapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
+    case SpvCapabilityGraphARM: return "GraphARM";
     case SpvCapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
+    case SpvCapabilityFloat8EXT: return "Float8EXT";
+    case SpvCapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
     case SpvCapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
     case SpvCapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
     case SpvCapabilityDrawParameters: return "DrawParameters";
@@ -3992,6 +4054,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
     case SpvCapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
     case SpvCapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
     case SpvCapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
+    case SpvCapabilityTileShadingQCOM: return "TileShadingQCOM";
     case SpvCapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
     case SpvCapabilityFloat16ImageAMD: return "Float16ImageAMD";
     case SpvCapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
@@ -4002,6 +4065,8 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
     case SpvCapabilityShaderClockKHR: return "ShaderClockKHR";
     case SpvCapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
     case SpvCapabilityQuadControlKHR: return "QuadControlKHR";
+    case SpvCapabilityInt4TypeINTEL: return "Int4TypeINTEL";
+    case SpvCapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
     case SpvCapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
     case SpvCapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
     case SpvCapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
@@ -4144,6 +4209,7 @@ inline const char* SpvCapabilityToString(SpvCapability value) {
     case SpvCapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
     case SpvCapabilityCacheControlsINTEL: return "CacheControlsINTEL";
     case SpvCapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
+    case SpvCapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
     default: return "Unknown";
     }
 }
@@ -4299,6 +4365,8 @@ inline const char* SpvNamedMaximumNumberOfRegistersToString(SpvNamedMaximumNumbe
 inline const char* SpvFPEncodingToString(SpvFPEncoding value) {
     switch (value) {
     case SpvFPEncodingBFloat16KHR: return "BFloat16KHR";
+    case SpvFPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
+    case SpvFPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
     default: return "Unknown";
     }
 }
@@ -4683,6 +4751,17 @@ inline const char* SpvOpToString(SpvOp value) {
     case SpvOpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
     case SpvOpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
     case SpvOpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
+    case SpvOpTypeTensorARM: return "OpTypeTensorARM";
+    case SpvOpTensorReadARM: return "OpTensorReadARM";
+    case SpvOpTensorWriteARM: return "OpTensorWriteARM";
+    case SpvOpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
+    case SpvOpGraphConstantARM: return "OpGraphConstantARM";
+    case SpvOpGraphEntryPointARM: return "OpGraphEntryPointARM";
+    case SpvOpGraphARM: return "OpGraphARM";
+    case SpvOpGraphInputARM: return "OpGraphInputARM";
+    case SpvOpGraphSetOutputARM: return "OpGraphSetOutputARM";
+    case SpvOpGraphEndARM: return "OpGraphEndARM";
+    case SpvOpTypeGraphARM: return "OpTypeGraphARM";
     case SpvOpTerminateInvocation: return "OpTerminateInvocation";
     case SpvOpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
     case SpvOpUntypedVariableKHR: return "OpUntypedVariableKHR";
@@ -5130,6 +5209,9 @@ inline const char* SpvOpToString(SpvOp value) {
     case SpvOpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
     case SpvOpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
     case SpvOpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
+    case SpvOpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
+    case SpvOpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
+    case SpvOpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
     default: return "Unknown";
     }
 }

+ 108 - 22
3rdparty/spirv-cross/spirv.hpp

@@ -1,26 +1,10 @@
-// Copyright (c) 2014-2024 The Khronos Group Inc.
+// Copyright: 2014-2024 The Khronos Group Inc.
+// License: MIT
 // 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and/or associated documentation files (the "Materials"),
-// to deal in the Materials without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Materials, and to permit persons to whom the
-// Materials are furnished to do so, subject to the following conditions:
-// 
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Materials.
-// 
-// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
-// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
-// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
-// 
-// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
-// IN THE MATERIALS.
+// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+// https://www.khronos.org/registry/
 
 // This header is automatically generated by the same tool that creates
 // the Binary Section of the SPIR-V specification.
@@ -172,6 +156,8 @@ enum ExecutionMode {
     ExecutionModeSignedZeroInfNanPreserve = 4461,
     ExecutionModeRoundingModeRTE = 4462,
     ExecutionModeRoundingModeRTZ = 4463,
+    ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
+    ExecutionModeTileShadingRateQCOM = 4490,
     ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
     ExecutionModeStencilRefReplacingEXT = 5027,
     ExecutionModeCoalescingAMDX = 5069,
@@ -241,6 +227,7 @@ enum StorageClass {
     StorageClassImage = 11,
     StorageClassStorageBuffer = 12,
     StorageClassTileImageEXT = 4172,
+    StorageClassTileAttachmentQCOM = 4491,
     StorageClassNodePayloadAMDX = 5068,
     StorageClassCallableDataKHR = 5328,
     StorageClassCallableDataNV = 5328,
@@ -550,6 +537,7 @@ enum Decoration {
     DecorationMaxByteOffset = 45,
     DecorationAlignmentId = 46,
     DecorationMaxByteOffsetId = 47,
+    DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
     DecorationNoSignedWrap = 4469,
     DecorationNoUnsignedWrap = 4470,
     DecorationWeightTextureQCOM = 4487,
@@ -719,6 +707,9 @@ enum BuiltIn {
     BuiltInDeviceIndex = 4438,
     BuiltInViewIndex = 4440,
     BuiltInShadingRateKHR = 4444,
+    BuiltInTileOffsetQCOM = 4492,
+    BuiltInTileDimensionQCOM = 4493,
+    BuiltInTileApronSizeQCOM = 4494,
     BuiltInBaryCoordNoPerspAMD = 4992,
     BuiltInBaryCoordNoPerspCentroidAMD = 4993,
     BuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -1069,7 +1060,13 @@ enum Capability {
     CapabilityTileImageColorReadAccessEXT = 4166,
     CapabilityTileImageDepthReadAccessEXT = 4167,
     CapabilityTileImageStencilReadAccessEXT = 4168,
+    CapabilityTensorsARM = 4174,
+    CapabilityStorageTensorArrayDynamicIndexingARM = 4175,
+    CapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
+    CapabilityGraphARM = 4191,
     CapabilityCooperativeMatrixLayoutsARM = 4201,
+    CapabilityFloat8EXT = 4212,
+    CapabilityFloat8CooperativeMatrixEXT = 4213,
     CapabilityFragmentShadingRateKHR = 4422,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
@@ -1105,6 +1102,7 @@ enum Capability {
     CapabilityTextureSampleWeightedQCOM = 4484,
     CapabilityTextureBoxFilterQCOM = 4485,
     CapabilityTextureBlockMatchQCOM = 4486,
+    CapabilityTileShadingQCOM = 4495,
     CapabilityTextureBlockMatch2QCOM = 4498,
     CapabilityFloat16ImageAMD = 5008,
     CapabilityImageGatherBiasLodAMD = 5009,
@@ -1115,6 +1113,8 @@ enum Capability {
     CapabilityShaderClockKHR = 5055,
     CapabilityShaderEnqueueAMDX = 5067,
     CapabilityQuadControlKHR = 5087,
+    CapabilityInt4TypeINTEL = 5112,
+    CapabilityInt4CooperativeMatrixINTEL = 5114,
     CapabilityBFloat16TypeKHR = 5116,
     CapabilityBFloat16DotProductKHR = 5117,
     CapabilityBFloat16CooperativeMatrixKHR = 5118,
@@ -1283,6 +1283,7 @@ enum Capability {
     CapabilityMaskedGatherScatterINTEL = 6427,
     CapabilityCacheControlsINTEL = 6441,
     CapabilityRegisterLimitsINTEL = 6460,
+    CapabilityBindlessImagesINTEL = 6528,
     CapabilityMax = 0x7fffffff,
 };
 
@@ -1459,6 +1460,24 @@ enum TensorAddressingOperandsMask {
     TensorAddressingOperandsDecodeFuncMask = 0x00000002,
 };
 
+enum TensorOperandsShift {
+    TensorOperandsNontemporalARMShift = 0,
+    TensorOperandsOutOfBoundsValueARMShift = 1,
+    TensorOperandsMakeElementAvailableARMShift = 2,
+    TensorOperandsMakeElementVisibleARMShift = 3,
+    TensorOperandsNonPrivateElementARMShift = 4,
+    TensorOperandsMax = 0x7fffffff,
+};
+
+enum TensorOperandsMask {
+    TensorOperandsMaskNone = 0,
+    TensorOperandsNontemporalARMMask = 0x00000001,
+    TensorOperandsOutOfBoundsValueARMMask = 0x00000002,
+    TensorOperandsMakeElementAvailableARMMask = 0x00000004,
+    TensorOperandsMakeElementVisibleARMMask = 0x00000008,
+    TensorOperandsNonPrivateElementARMMask = 0x00000010,
+};
+
 enum InitializationModeQualifier {
     InitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
     InitializationModeQualifierInitOnDeviceResetINTEL = 1,
@@ -1545,6 +1564,8 @@ enum RawAccessChainOperandsMask {
 
 enum FPEncoding {
     FPEncodingBFloat16KHR = 0,
+    FPEncodingFloat8E4M3EXT = 4214,
+    FPEncodingFloat8E5M2EXT = 4215,
     FPEncodingMax = 0x7fffffff,
 };
 
@@ -1923,6 +1944,17 @@ enum Op {
     OpColorAttachmentReadEXT = 4160,
     OpDepthAttachmentReadEXT = 4161,
     OpStencilAttachmentReadEXT = 4162,
+    OpTypeTensorARM = 4163,
+    OpTensorReadARM = 4164,
+    OpTensorWriteARM = 4165,
+    OpTensorQuerySizeARM = 4166,
+    OpGraphConstantARM = 4181,
+    OpGraphEntryPointARM = 4182,
+    OpGraphARM = 4183,
+    OpGraphInputARM = 4184,
+    OpGraphSetOutputARM = 4185,
+    OpGraphEndARM = 4186,
+    OpTypeGraphARM = 4190,
     OpTerminateInvocation = 4416,
     OpTypeUntypedPointerKHR = 4417,
     OpUntypedVariableKHR = 4418,
@@ -2381,6 +2413,9 @@ enum Op {
     OpRoundFToTF32INTEL = 6426,
     OpMaskedGatherINTEL = 6428,
     OpMaskedScatterINTEL = 6429,
+    OpConvertHandleToImageINTEL = 6529,
+    OpConvertHandleToSamplerINTEL = 6530,
+    OpConvertHandleToSampledImageINTEL = 6531,
     OpMax = 0x7fffffff,
 };
 
@@ -2739,6 +2774,17 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
     case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
     case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
     case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+    case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
+    case OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
+    case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
+    case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
+    case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
+    case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
+    case OpGraphARM: *hasResult = true; *hasResultType = true; break;
+    case OpGraphInputARM: *hasResult = true; *hasResultType = true; break;
+    case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
+    case OpGraphEndARM: *hasResult = false; *hasResultType = false; break;
+    case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
     case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
     case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
     case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
@@ -3186,6 +3232,9 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
     case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
     case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
     case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
     }
 }
 inline const char* SourceLanguageToString(SourceLanguage value) {
@@ -3301,6 +3350,8 @@ inline const char* ExecutionModeToString(ExecutionMode value) {
     case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
     case ExecutionModeRoundingModeRTE: return "RoundingModeRTE";
     case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
+    case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
+    case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
     case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
     case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
     case ExecutionModeCoalescingAMDX: return "CoalescingAMDX";
@@ -3367,6 +3418,7 @@ inline const char* StorageClassToString(StorageClass value) {
     case StorageClassImage: return "Image";
     case StorageClassStorageBuffer: return "StorageBuffer";
     case StorageClassTileImageEXT: return "TileImageEXT";
+    case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
     case StorageClassNodePayloadAMDX: return "NodePayloadAMDX";
     case StorageClassCallableDataKHR: return "CallableDataKHR";
     case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
@@ -3615,6 +3667,7 @@ inline const char* DecorationToString(Decoration value) {
     case DecorationMaxByteOffset: return "MaxByteOffset";
     case DecorationAlignmentId: return "AlignmentId";
     case DecorationMaxByteOffsetId: return "MaxByteOffsetId";
+    case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
     case DecorationNoSignedWrap: return "NoSignedWrap";
     case DecorationNoUnsignedWrap: return "NoUnsignedWrap";
     case DecorationWeightTextureQCOM: return "WeightTextureQCOM";
@@ -3774,6 +3827,9 @@ inline const char* BuiltInToString(BuiltIn value) {
     case BuiltInDeviceIndex: return "DeviceIndex";
     case BuiltInViewIndex: return "ViewIndex";
     case BuiltInShadingRateKHR: return "ShadingRateKHR";
+    case BuiltInTileOffsetQCOM: return "TileOffsetQCOM";
+    case BuiltInTileDimensionQCOM: return "TileDimensionQCOM";
+    case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
     case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
     case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
     case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
@@ -3954,7 +4010,13 @@ inline const char* CapabilityToString(Capability value) {
     case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
     case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
     case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
+    case CapabilityTensorsARM: return "TensorsARM";
+    case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
+    case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
+    case CapabilityGraphARM: return "GraphARM";
     case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
+    case CapabilityFloat8EXT: return "Float8EXT";
+    case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
     case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
     case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
     case CapabilityDrawParameters: return "DrawParameters";
@@ -3988,6 +4050,7 @@ inline const char* CapabilityToString(Capability value) {
     case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
     case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
     case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
+    case CapabilityTileShadingQCOM: return "TileShadingQCOM";
     case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
     case CapabilityFloat16ImageAMD: return "Float16ImageAMD";
     case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
@@ -3998,6 +4061,8 @@ inline const char* CapabilityToString(Capability value) {
     case CapabilityShaderClockKHR: return "ShaderClockKHR";
     case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
     case CapabilityQuadControlKHR: return "QuadControlKHR";
+    case CapabilityInt4TypeINTEL: return "Int4TypeINTEL";
+    case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
     case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
     case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
     case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
@@ -4140,6 +4205,7 @@ inline const char* CapabilityToString(Capability value) {
     case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
     case CapabilityCacheControlsINTEL: return "CacheControlsINTEL";
     case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
+    case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
     default: return "Unknown";
     }
 }
@@ -4295,6 +4361,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg
 inline const char* FPEncodingToString(FPEncoding value) {
     switch (value) {
     case FPEncodingBFloat16KHR: return "BFloat16KHR";
+    case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
+    case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
     default: return "Unknown";
     }
 }
@@ -4679,6 +4747,17 @@ inline const char* OpToString(Op value) {
     case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
     case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
     case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
+    case OpTypeTensorARM: return "OpTypeTensorARM";
+    case OpTensorReadARM: return "OpTensorReadARM";
+    case OpTensorWriteARM: return "OpTensorWriteARM";
+    case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
+    case OpGraphConstantARM: return "OpGraphConstantARM";
+    case OpGraphEntryPointARM: return "OpGraphEntryPointARM";
+    case OpGraphARM: return "OpGraphARM";
+    case OpGraphInputARM: return "OpGraphInputARM";
+    case OpGraphSetOutputARM: return "OpGraphSetOutputARM";
+    case OpGraphEndARM: return "OpGraphEndARM";
+    case OpTypeGraphARM: return "OpTypeGraphARM";
     case OpTerminateInvocation: return "OpTerminateInvocation";
     case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
     case OpUntypedVariableKHR: return "OpUntypedVariableKHR";
@@ -5126,6 +5205,9 @@ inline const char* OpToString(Op value) {
     case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
     case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
     case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
+    case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
+    case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
+    case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
     default: return "Unknown";
     }
 }
@@ -5186,6 +5268,10 @@ inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, Te
 inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
 inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
 inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
+inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
+inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
+inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
 inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
 inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
 inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }

+ 99 - 25
3rdparty/spirv-cross/spirv_cfg.cpp

@@ -81,31 +81,105 @@ bool CFG::is_back_edge(uint32_t to) const
 	// We have a back edge if the visit order is set with the temporary magic value 0.
 	// Crossing edges will have already been recorded with a visit order.
 	auto itr = visit_order.find(to);
-	return itr != end(visit_order) && itr->second.get() == 0;
+	return itr != end(visit_order) && itr->second.visited_branches && !itr->second.visited_resolve;
 }
 
-bool CFG::has_visited_forward_edge(uint32_t to) const
+bool CFG::has_visited_branch(uint32_t to) const
 {
-	// If > 0, we have visited the edge already, and this is not a back edge branch.
 	auto itr = visit_order.find(to);
-	return itr != end(visit_order) && itr->second.get() > 0;
+	return itr != end(visit_order) && itr->second.visited_branches;
 }
 
-bool CFG::post_order_visit(uint32_t block_id)
+void CFG::post_order_visit_entry(uint32_t block)
 {
-	// If we have already branched to this block (back edge), stop recursion.
-	// If our branches are back-edges, we do not record them.
-	// We have to record crossing edges however.
-	if (has_visited_forward_edge(block_id))
-		return true;
-	else if (is_back_edge(block_id))
-		return false;
+	visit_stack.push_back(block);
 
-	// Block back-edges from recursively revisiting ourselves.
-	visit_order[block_id].get() = 0;
+	while (!visit_stack.empty())
+	{
+		bool keep_iterating;
+		do
+		{
+			// Reverse the order to allow for stack-like behavior and preserves the visit order from recursive algorithm.
+			// Traverse depth first.
+			uint32_t to_visit = visit_stack.back();
+			last_visited_size = visit_stack.size();
+			post_order_visit_branches(to_visit);
+			keep_iterating = last_visited_size != visit_stack.size();
+			if (keep_iterating)
+				std::reverse(visit_stack.begin() + last_visited_size, visit_stack.end());
+		} while (keep_iterating);
+
+		// We've reached the end of some tree leaf. Resolve the stack.
+		// Any node which has been visited for real can be popped now.
+		while (!visit_stack.empty() && visit_order[visit_stack.back()].visited_branches)
+		{
+			post_order_visit_resolve(visit_stack.back());
+			visit_stack.pop_back();
+		}
+	}
+}
+
+void CFG::visit_branch(uint32_t block_id)
+{
+	// Prune obvious duplicates.
+	if (std::find(visit_stack.begin() + last_visited_size, visit_stack.end(), block_id) == visit_stack.end() &&
+	    !has_visited_branch(block_id))
+	{
+		visit_stack.push_back(block_id);
+	}
+}
+
+void CFG::post_order_visit_branches(uint32_t block_id)
+{
+	auto &block = compiler.get<SPIRBlock>(block_id);
+
+	auto &visit = visit_order[block_id];
+	if (visit.visited_branches)
+		return;
+	visit.visited_branches = true;
+
+	if (block.merge == SPIRBlock::MergeLoop)
+		visit_branch(block.merge_block);
+	else if (block.merge == SPIRBlock::MergeSelection)
+		visit_branch(block.next_block);
+
+	// First visit our branch targets.
+	switch (block.terminator)
+	{
+	case SPIRBlock::Direct:
+		visit_branch(block.next_block);
+		break;
+
+	case SPIRBlock::Select:
+		visit_branch(block.true_block);
+		visit_branch(block.false_block);
+		break;
 
+	case SPIRBlock::MultiSelect:
+	{
+		const auto &cases = compiler.get_case_list(block);
+		for (const auto &target : cases)
+			visit_branch(target.block);
+		if (block.default_block)
+			visit_branch(block.default_block);
+		break;
+	}
+
+	default:
+		break;
+	}
+}
+
+void CFG::post_order_visit_resolve(uint32_t block_id)
+{
 	auto &block = compiler.get<SPIRBlock>(block_id);
 
+	auto &visit_block = visit_order[block_id];
+	assert(visit_block.visited_branches);
+	auto &visited = visit_order[block_id].visited_resolve;
+	if (visited)
+		return;
+
 	// If this is a loop header, add an implied branch to the merge target.
 	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
 	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
@@ -116,21 +190,21 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
 	// For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
 	// but for loops, only the header might end up actually branching to merge block.
-	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
+	if (block.merge == SPIRBlock::MergeLoop && !is_back_edge(block.merge_block))
 		add_branch(block_id, block.merge_block);
 
 	// First visit our branch targets.
 	switch (block.terminator)
 	{
 	case SPIRBlock::Direct:
-		if (post_order_visit(block.next_block))
+		if (!is_back_edge(block.next_block))
 			add_branch(block_id, block.next_block);
 		break;
 
 	case SPIRBlock::Select:
-		if (post_order_visit(block.true_block))
+		if (!is_back_edge(block.true_block))
 			add_branch(block_id, block.true_block);
-		if (post_order_visit(block.false_block))
+		if (!is_back_edge(block.false_block))
 			add_branch(block_id, block.false_block);
 		break;
 
@@ -139,10 +213,10 @@ bool CFG::post_order_visit(uint32_t block_id)
 		const auto &cases = compiler.get_case_list(block);
 		for (const auto &target : cases)
 		{
-			if (post_order_visit(target.block))
+			if (!is_back_edge(target.block))
 				add_branch(block_id, target.block);
 		}
-		if (block.default_block && post_order_visit(block.default_block))
+		if (block.default_block && !is_back_edge(block.default_block))
 			add_branch(block_id, block.default_block);
 		break;
 	}
@@ -157,7 +231,7 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// We can use the variable without a Phi since there is only one possible parent here.
 	// However, in this case, we need to hoist out the inner variable to outside the branch.
 	// Use same strategy as loops.
-	if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
+	if (block.merge == SPIRBlock::MergeSelection && !is_back_edge(block.next_block))
 	{
 		// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
 		// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
@@ -201,10 +275,9 @@ bool CFG::post_order_visit(uint32_t block_id)
 		}
 	}
 
-	// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
-	visit_order[block_id].get() = ++visit_count;
+	visited = true;
+	visit_block.order = ++visit_count;
 	post_order.push_back(block_id);
-	return true;
 }
 
 void CFG::build_post_order_visit_order()
@@ -213,11 +286,12 @@ void CFG::build_post_order_visit_order()
 	visit_count = 0;
 	visit_order.clear();
 	post_order.clear();
-	post_order_visit(block);
+	post_order_visit_entry(block);
 }
 
 void CFG::add_branch(uint32_t from, uint32_t to)
 {
+	assert(from && to);
 	const auto add_unique = [](SmallVector<uint32_t> &l, uint32_t value) {
 		auto itr = find(begin(l), end(l), value);
 		if (itr == end(l))

+ 12 - 14
3rdparty/spirv-cross/spirv_cfg.hpp

@@ -68,7 +68,7 @@ public:
 	{
 		auto itr = visit_order.find(block);
 		assert(itr != std::end(visit_order));
-		int v = itr->second.get();
+		int v = itr->second.order;
 		assert(v > 0);
 		return uint32_t(v);
 	}
@@ -114,17 +114,9 @@ public:
 private:
 	struct VisitOrder
 	{
-		int &get()
-		{
-			return v;
-		}
-
-		const int &get() const
-		{
-			return v;
-		}
-
-		int v = -1;
+		int order = -1;
+		bool visited_resolve = false;
+		bool visited_branches = false;
 	};
 
 	Compiler &compiler;
@@ -139,11 +131,17 @@ private:
 	void add_branch(uint32_t from, uint32_t to);
 	void build_post_order_visit_order();
 	void build_immediate_dominators();
-	bool post_order_visit(uint32_t block);
+	void post_order_visit_branches(uint32_t block);
+	void post_order_visit_resolve(uint32_t block);
+	void post_order_visit_entry(uint32_t block);
 	uint32_t visit_count = 0;
 
 	bool is_back_edge(uint32_t to) const;
-	bool has_visited_forward_edge(uint32_t to) const;
+	bool has_visited_branch(uint32_t to) const;
+	void visit_branch(uint32_t block_id);
+
+	SmallVector<uint32_t> visit_stack;
+	size_t last_visited_size = 0;
 };
 
 class DominatorBuilder

+ 78 - 9
3rdparty/spirv-cross/spirv_common.hpp

@@ -574,6 +574,7 @@ struct SPIRType : IVariant
 		Sampler,
 		AccelerationStructure,
 		RayQuery,
+		CoopVecNV,
 
 		// Keep internal types at the end.
 		ControlPointArray,
@@ -581,7 +582,11 @@ struct SPIRType : IVariant
 		Char,
 		// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
 		MeshGridProperties,
-		BFloat16
+		BFloat16,
+		FloatE4M3,
+		FloatE5M2,
+
+		Tensor
 	};
 
 	// Scalar/vector/matrix support.
@@ -606,13 +611,29 @@ struct SPIRType : IVariant
 	bool pointer = false;
 	bool forward_pointer = false;
 
-	struct
+	union
 	{
-		uint32_t use_id = 0;
-		uint32_t rows_id = 0;
-		uint32_t columns_id = 0;
-		uint32_t scope_id = 0;
-	} cooperative;
+		struct
+		{
+			uint32_t use_id;
+			uint32_t rows_id;
+			uint32_t columns_id;
+			uint32_t scope_id;
+		} cooperative;
+
+		struct
+		{
+			uint32_t component_type_id;
+			uint32_t component_count_id;
+		} coopVecNV;
+
+		struct
+		{
+			uint32_t type;
+			uint32_t rank;
+			uint32_t shape;
+		} tensor;
+	} ext;
 
 	spv::StorageClass storage = spv::StorageClassGeneric;
 
@@ -670,6 +691,12 @@ struct SPIRExtension : IVariant
 		NonSemanticGeneric
 	};
 
+	enum ShaderDebugInfoOps
+	{
+		DebugLine = 103,
+		DebugSource = 35
+	};
+
 	explicit SPIRExtension(Extension ext_)
 	    : ext(ext_)
 	{
@@ -695,6 +722,11 @@ struct SPIREntryPoint
 	FunctionID self = 0;
 	std::string name;
 	std::string orig_name;
+	std::unordered_map<uint32_t, uint32_t> fp_fast_math_defaults;
+	bool signed_zero_inf_nan_preserve_8 = false;
+	bool signed_zero_inf_nan_preserve_16 = false;
+	bool signed_zero_inf_nan_preserve_32 = false;
+	bool signed_zero_inf_nan_preserve_64 = false;
 	SmallVector<VariableID> interface_variables;
 
 	Bitset flags;
@@ -927,6 +959,7 @@ struct SPIRBlock : IVariant
 	// All access to these variables are dominated by this block,
 	// so before branching anywhere we need to make sure that we declare these variables.
 	SmallVector<VariableID> dominated_variables;
+	SmallVector<bool> rearm_dominated_variables;
 
 	// These are variables which should be declared in a for loop header, if we
 	// fail to use a classic for-loop,
@@ -1238,6 +1271,26 @@ struct SPIRConstant : IVariant
 		return u.f32;
 	}
 
+	static inline float fe4m3_to_f32(uint8_t v)
+	{
+		if ((v & 0x7f) == 0x7f)
+		{
+			union
+			{
+				float f32;
+				uint32_t u32;
+			} u;
+
+			u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
+			return u.f32;
+		}
+		else
+		{
+			// Reuse the FP16 to FP32 code. Cute bit-hackery.
+			return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
+		}
+	}
+
 	inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
 	{
 		return m.c[col].id[row];
@@ -1286,6 +1339,16 @@ struct SPIRConstant : IVariant
 		return fp32;
 	}
 
+	inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
+	{
+		return fe4m3_to_f32(scalar_u8(col, row));
+	}
+
+	inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
+	{
+		return f16_to_f32(scalar_u8(col, row) << 8);
+	}
+
 	inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
 	{
 		return m.c[col].r[row].f32;
@@ -1356,9 +1419,10 @@ struct SPIRConstant : IVariant
 
 	SPIRConstant() = default;
 
-	SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
+	SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
+	    , replicated(replicated_)
 	{
 		subconstants.reserve(num_elements);
 		for (uint32_t i = 0; i < num_elements; i++)
@@ -1437,6 +1501,9 @@ struct SPIRConstant : IVariant
 	// For composites which are constant arrays, etc.
 	SmallVector<ConstantID> subconstants;
 
+	// Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
+	bool replicated = false;
+
 	// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
 	// and uses them to initialize the constant. This allows the user
 	// to still be able to specialize the value by supplying corresponding
@@ -1732,6 +1799,7 @@ struct Meta
 		uint32_t spec_id = 0;
 		uint32_t index = 0;
 		spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
+		spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
 		bool builtin = false;
 		bool qualified_alias_explicit_override = false;
 
@@ -1787,7 +1855,8 @@ private:
 
 static inline bool type_is_floating_point(const SPIRType &type)
 {
-	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double ||
+	       type.basetype == SPIRType::BFloat16 || type.basetype == SPIRType::FloatE5M2 || type.basetype == SPIRType::FloatE4M3;
 }
 
 static inline bool type_is_integral(const SPIRType &type)

+ 26 - 5
3rdparty/spirv-cross/spirv_cross.cpp

@@ -280,6 +280,9 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 			// This is a global side effect of the function.
 			return false;
 
+		case OpTensorReadARM:
+			return false;
+
 		case OpExtInst:
 		{
 			uint32_t extension_set = ops[2];
@@ -373,6 +376,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
 
 		case OpLoad:
 		case OpCooperativeMatrixLoadKHR:
+		case OpCooperativeVectorLoadNV:
 		case OpImageRead:
 		{
 			// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
@@ -1152,6 +1156,11 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *
 			{
 				res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
 			}
+			// Tensors
+			else if (type.basetype == SPIRType::Tensor)
+			{
+				res.tensors.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
+			}
 			else
 			{
 				res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -1169,11 +1178,8 @@ bool Compiler::type_is_top_level_block(const SPIRType &type) const
 	return has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
 }
 
-bool Compiler::type_is_block_like(const SPIRType &type) const
+bool Compiler::type_is_explicit_layout(const SPIRType &type) const
 {
-	if (type_is_top_level_block(type))
-		return true;
-
 	if (type.basetype == SPIRType::Struct)
 	{
 		// Block-like types may have Offset decorations.
@@ -1185,6 +1191,14 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
 	return false;
 }
 
+bool Compiler::type_is_block_like(const SPIRType &type) const
+{
+	if (type_is_top_level_block(type))
+		return true;
+	else
+		return type_is_explicit_layout(type);
+}
+
 void Compiler::parse_fixup()
 {
 	// Figure out specialization constants for work group sizes.
@@ -2370,6 +2384,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 		execution.output_primitives = arg0;
 		break;
 
+	case ExecutionModeFPFastMathDefault:
+		execution.fp_fast_math_defaults[arg0] = arg1;
+		break;
+
 	default:
 		break;
 	}
@@ -4334,6 +4352,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
 
 		case OpCopyObject:
 		case OpLoad:
+		case OpCooperativeVectorLoadNV:
 		case OpCooperativeMatrixLoadKHR:
 			if (ops[2] == var)
 				return true;
@@ -5151,7 +5170,7 @@ bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
 bool Compiler::type_is_opaque_value(const SPIRType &type) const
 {
 	return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
-	                         type.basetype == SPIRType::Sampler);
+	                         type.basetype == SPIRType::Sampler || type.basetype == SPIRType::Tensor);
 }
 
 // Make these member functions so we can easily break on any force_recompile events.
@@ -5469,6 +5488,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
 	{
 	case OpLoad:
 	case OpCooperativeMatrixLoadKHR:
+	case OpCooperativeVectorLoadNV:
 	{
 		if (length < 3)
 			return false;
@@ -5547,6 +5567,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
 	case OpImageWrite:
 	case OpAtomicStore:
 	case OpCooperativeMatrixStoreKHR:
+	case OpCooperativeVectorStoreNV:
 	{
 		if (length < 1)
 			return false;

+ 2 - 0
3rdparty/spirv-cross/spirv_cross.hpp

@@ -95,6 +95,7 @@ struct ShaderResources
 	SmallVector<Resource> atomic_counters;
 	SmallVector<Resource> acceleration_structures;
 	SmallVector<Resource> gl_plain_uniforms;
+	SmallVector<Resource> tensors;
 
 	// There can only be one push constant block,
 	// but keep the vector in case this restriction is lifted in the future.
@@ -1171,6 +1172,7 @@ protected:
 	bool type_contains_recursion(const SPIRType &type);
 	bool type_is_array_of_pointers(const SPIRType &type) const;
 	bool type_is_block_like(const SPIRType &type) const;
+	bool type_is_explicit_layout(const SPIRType &type) const;
 	bool type_is_top_level_block(const SPIRType &type) const;
 	bool type_is_opaque_value(const SPIRType &type) const;
 

+ 8 - 0
3rdparty/spirv-cross/spirv_cross_c.cpp

@@ -200,6 +200,7 @@ struct spvc_resources_s : ScratchMemoryAllocation
 	SmallVector<spvc_reflected_resource> separate_samplers;
 	SmallVector<spvc_reflected_resource> acceleration_structures;
 	SmallVector<spvc_reflected_resource> gl_plain_uniforms;
+	SmallVector<spvc_reflected_resource> tensors;
 
 	SmallVector<spvc_reflected_builtin_resource> builtin_inputs;
 	SmallVector<spvc_reflected_builtin_resource> builtin_outputs;
@@ -1872,6 +1873,8 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources)
 		return false;
 	if (!copy_resources(gl_plain_uniforms, resources.gl_plain_uniforms))
 		return false;
+	if (!copy_resources(tensors, resources.tensors))
+		return false;
 	if (!copy_resources(builtin_inputs, resources.builtin_inputs))
 		return false;
 	if (!copy_resources(builtin_outputs, resources.builtin_outputs))
@@ -2025,6 +2028,11 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources,
 
 	case SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM:
 		list = &resources->gl_plain_uniforms;
+		break;
+
+	case SPVC_RESOURCE_TYPE_TENSOR:
+		list = &resources->tensors;
+		break;
 
 	default:
 		break;

+ 2 - 1
3rdparty/spirv-cross/spirv_cross_c.h

@@ -40,7 +40,7 @@ extern "C" {
 /* Bumped if ABI or API breaks backwards compatibility. */
 #define SPVC_C_API_VERSION_MAJOR 0
 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 66
+#define SPVC_C_API_VERSION_MINOR 67
 /* Bumped if internal implementation details change. */
 #define SPVC_C_API_VERSION_PATCH 0
 
@@ -227,6 +227,7 @@ typedef enum spvc_resource_type
 	SPVC_RESOURCE_TYPE_RAY_QUERY = 13,
 	SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14,
 	SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15,
+	SPVC_RESOURCE_TYPE_TENSOR = 16,
 	SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff
 } spvc_resource_type;
 

+ 30 - 1
3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp

@@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
 		dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument);
 		break;
 
+	case DecorationFPFastMathMode:
+		dec.fp_fast_math_mode = static_cast<FPFastMathModeMask>(argument);
+		break;
+
 	default:
 		break;
 	}
@@ -523,8 +527,27 @@ void ParsedIR::mark_used_as_array_length(ID id)
 	switch (ids[id].get_type())
 	{
 	case TypeConstant:
-		get<SPIRConstant>(id).is_used_as_array_length = true;
+	{
+		auto &c = get<SPIRConstant>(id);
+		c.is_used_as_array_length = true;
+
+		// Mark composite dependencies as well.
+		for (auto &sub_id: c.m.id)
+			if (sub_id)
+				mark_used_as_array_length(sub_id);
+
+		for (uint32_t col = 0; col < c.m.columns; col++)
+		{
+			for (auto &sub_id : c.m.c[col].id)
+				if (sub_id)
+					mark_used_as_array_length(sub_id);
+		}
+
+		for (auto &sub_id : c.subconstants)
+			if (sub_id)
+				mark_used_as_array_length(sub_id);
 		break;
+	}
 
 	case TypeConstantOp:
 	{
@@ -643,6 +666,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
 		return dec.index;
 	case DecorationFPRoundingMode:
 		return dec.fp_rounding_mode;
+	case DecorationFPFastMathMode:
+		return dec.fp_fast_math_mode;
 	default:
 		return 1;
 	}
@@ -730,6 +755,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
 		dec.fp_rounding_mode = FPRoundingModeMax;
 		break;
 
+	case DecorationFPFastMathMode:
+		dec.fp_fast_math_mode = FPFastMathModeMaskNone;
+		break;
+
 	case DecorationHlslCounterBufferGOOGLE:
 	{
 		auto &counter = meta[id].hlsl_magic_counter_buffer;

+ 1 - 0
3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp

@@ -111,6 +111,7 @@ public:
 
 	struct Source
 	{
+		spv::SourceLanguage lang = spv::SourceLanguageUnknown;
 		uint32_t version = 0;
 		bool es = false;
 		bool known = false;

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 565 - 78
3rdparty/spirv-cross/spirv_glsl.cpp


+ 17 - 1
3rdparty/spirv-cross/spirv_glsl.hpp

@@ -32,6 +32,8 @@
 
 namespace SPIRV_CROSS_NAMESPACE
 {
+struct GlslConstantNameMapping;
+
 enum PlsFormat
 {
 	PlsNone = 0,
@@ -426,6 +428,8 @@ protected:
 	                                                   const uint32_t *args, uint32_t count);
 	virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                                        uint32_t count);
+	void emit_non_semantic_shader_debug_info(uint32_t result_type, uint32_t result_id, uint32_t op,
+	                                         const uint32_t *args, uint32_t count);
 	virtual void emit_header();
 	void emit_line_directive(uint32_t file_id, uint32_t line_literal);
 	void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
@@ -662,6 +666,7 @@ protected:
 		bool workgroup_size_is_hidden = false;
 		bool requires_relaxed_precision_analysis = false;
 		bool implicit_c_integer_promotion_rules = false;
+		bool supports_spec_constant_array_size = true;
 	} backend;
 
 	void emit_struct(SPIRType &type);
@@ -685,6 +690,8 @@ protected:
 	void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
 	                                    const SmallVector<uint32_t> &indices);
 	void emit_block_chain(SPIRBlock &block);
+	BlockID emit_block_chain_inner(SPIRBlock &block);
+	void emit_block_chain_cleanup(SPIRBlock &block);
 	void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
 	int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
 	void emit_constant(const SPIRConstant &constant);
@@ -802,6 +809,10 @@ protected:
 	virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
 	std::string to_non_uniform_aware_expression(uint32_t id);
 	std::string to_atomic_ptr_expression(uint32_t id);
+	std::string to_pretty_expression_if_int_constant(
+			uint32_t id,
+			const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end,
+			bool register_expression_read = true);
 	std::string to_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
 	std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
@@ -830,7 +841,7 @@ protected:
 	void emit_output_variable_initializer(const SPIRVariable &var);
 	std::string to_precision_qualifiers_glsl(uint32_t id);
 	virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
-	std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
+	std::string flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags);
 	const char *format_to_glsl(spv::ImageFormat format);
 	virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
 	virtual std::string to_interpolation_qualifiers(const Bitset &flags);
@@ -1017,6 +1028,8 @@ protected:
 	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 	static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
 
+	std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
+	std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
@@ -1068,6 +1081,9 @@ protected:
 	std::string format_float(float value) const;
 	std::string format_double(double value) const;
 
+	uint32_t get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const;
+	bool has_legacy_nocontract(uint32_t result_type, uint32_t id) const;
+
 private:
 	void init();
 

+ 2 - 2
3rdparty/spirv-cross/spirv_hlsl.cpp

@@ -3029,7 +3029,7 @@ string CompilerHLSL::get_inner_entry_point_name() const
 		SPIRV_CROSS_THROW("Unsupported execution model.");
 }
 
-uint32_t CompilerHLSL::input_vertices_from_execution_mode(spirv_cross::SPIREntryPoint &execution) const
+uint32_t CompilerHLSL::input_vertices_from_execution_mode(SPIREntryPoint &execution) const
 {
 	uint32_t input_vertices = 1;
 
@@ -3061,7 +3061,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
 	auto &type = get<SPIRType>(func.return_type);
 	if (type.array.empty())
 	{
-		decl += flags_to_qualifiers_glsl(type, return_flags);
+		decl += flags_to_qualifiers_glsl(type, 0, return_flags);
 		decl += type_to_glsl(type);
 		decl += " ";
 	}

+ 291 - 97
3rdparty/spirv-cross/spirv_msl.cpp

@@ -510,7 +510,7 @@ void CompilerMSL::build_implicit_builtins()
 				has_local_invocation_index = true;
 			}
 
-			if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
+			if (need_workgroup_size && builtin == BuiltInWorkgroupSize)
 			{
 				builtin_workgroup_size_id = var.self;
 				mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
@@ -684,28 +684,6 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
 			}
 
-			if (need_multiview)
-			{
-				// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
-				// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
-				// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
-				// gl_Layer is an output in vertex-pipeline shaders.
-				uint32_t type_ptr_out_id = ir.increase_bound_by(2);
-				SPIRType uint_type_ptr_out = get_uint_type();
-				uint_type_ptr.op = OpTypePointer;
-				uint_type_ptr_out.pointer = true;
-				uint_type_ptr_out.pointer_depth++;
-				uint_type_ptr_out.parent_type = get_uint_type_id();
-				uint_type_ptr_out.storage = StorageClassOutput;
-				auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
-				ptr_out_type.self = get_uint_type_id();
-				uint32_t var_id = type_ptr_out_id + 1;
-				set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
-				set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
-				builtin_layer_id = var_id;
-				mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
-			}
-
 			if (need_multiview && !has_view_idx)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
@@ -718,6 +696,28 @@ void CompilerMSL::build_implicit_builtins()
 			}
 		}
 
+		if (need_multiview)
+		{
+			// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
+			// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
+			// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
+			// gl_Layer is an output in vertex-pipeline shaders.
+			uint32_t type_ptr_out_id = ir.increase_bound_by(2);
+			SPIRType uint_type_ptr_out = get_uint_type();
+			uint_type_ptr_out.op = OpTypePointer;
+			uint_type_ptr_out.pointer = true;
+			uint_type_ptr_out.pointer_depth++;
+			uint_type_ptr_out.parent_type = get_uint_type_id();
+			uint_type_ptr_out.storage = StorageClassOutput;
+			auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
+			ptr_out_type.self = get_uint_type_id();
+			uint32_t var_id = type_ptr_out_id + 1;
+			set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+			builtin_layer_id = var_id;
+			mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
+		}
+
 		if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
 		    (need_tese_params && !has_primitive_id) || need_grid_params)
 		{
@@ -932,25 +932,55 @@ void CompilerMSL::build_implicit_builtins()
 
 		if (need_workgroup_size && !has_workgroup_size)
 		{
-			uint32_t offset = ir.increase_bound_by(2);
-			uint32_t type_ptr_id = offset;
-			uint32_t var_id = offset + 1;
-
-			// Create gl_WorkgroupSize.
-			uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
-			SPIRType uint_type_ptr = get<SPIRType>(type_id);
-			uint_type_ptr.op = OpTypePointer;
-			uint_type_ptr.pointer = true;
-			uint_type_ptr.pointer_depth++;
-			uint_type_ptr.parent_type = type_id;
-			uint_type_ptr.storage = StorageClassInput;
+			auto &execution = get_entry_point();
+			// First, check if the workgroup size _constant_ were defined.
+			// If it were, we don't need to do--in fact, shouldn't do--anything.
+			builtin_workgroup_size_id = execution.workgroup_size.constant;
+			if (builtin_workgroup_size_id == 0)
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
 
-			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
-			ptr_type.self = type_id;
-			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
-			set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
-			builtin_workgroup_size_id = var_id;
-			mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
+				// Create gl_WorkgroupSize.
+				uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
+				// If we have LocalSize or LocalSizeId, use those to define the workgroup size.
+				if (execution.flags.get(ExecutionModeLocalSizeId))
+				{
+					const SPIRConstant *init[] = { &get<SPIRConstant>(execution.workgroup_size.id_x),
+						                           &get<SPIRConstant>(execution.workgroup_size.id_y),
+						                           &get<SPIRConstant>(execution.workgroup_size.id_z) };
+					bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization;
+					set<SPIRConstant>(var_id, type_id, init, 3, specialized);
+					execution.workgroup_size.constant = var_id;
+				}
+				else if (execution.flags.get(ExecutionModeLocalSize))
+				{
+					uint32_t offset = ir.increase_bound_by(3);
+					const SPIRConstant *init[] = {
+						&set<SPIRConstant>(offset, get_uint_type_id(), execution.workgroup_size.x, false),
+						&set<SPIRConstant>(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false),
+						&set<SPIRConstant>(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false)
+					};
+					set<SPIRConstant>(var_id, type_id, init, 3, false);
+					execution.workgroup_size.constant = var_id;
+				}
+				else
+				{
+					uint32_t type_ptr_id = ir.increase_bound_by(1);
+					SPIRType uint_type_ptr = get<SPIRType>(type_id);
+					uint_type_ptr.op = OpTypePointer;
+					uint_type_ptr.pointer = true;
+					uint_type_ptr.pointer_depth++;
+					uint_type_ptr.parent_type = type_id;
+					uint_type_ptr.storage = StorageClassInput;
+
+					auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+					ptr_type.self = type_id;
+					set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+					mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
+				}
+				set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
+				builtin_workgroup_size_id = var_id;
+			}
 		}
 
 		if (!has_frag_depth && force_frag_depth_passthrough)
@@ -1681,6 +1711,7 @@ string CompilerMSL::compile()
 	backend.array_is_value_type_in_buffer_blocks = false;
 	backend.support_pointer_to_pointer = true;
 	backend.implicit_c_integer_promotion_rules = true;
+	backend.supports_spec_constant_array_size = false;
 
 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@@ -1841,7 +1872,7 @@ void CompilerMSL::preprocess_op_codes()
 	if (preproc.uses_atomics)
 	{
 		add_header_line("#include <metal_atomic>");
-		add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
+		add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false);
 	}
 
 	// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
@@ -1850,6 +1881,10 @@ void CompilerMSL::preprocess_op_codes()
 	    (preproc.uses_image_write && !msl_options.supports_msl_version(2, 2)))
 		is_rasterization_disabled = true;
 
+	// FIXME: This currently does not consider BDA side effects, so we cannot deduce const device for BDA.
+	if (preproc.uses_buffer_write || preproc.uses_image_write)
+		has_descriptor_side_effects = true;
+
 	// Tessellation control shaders are run as compute functions in Metal, and so
 	// must capture their output to a buffer.
 	if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
@@ -5720,22 +5755,44 @@ void CompilerMSL::emit_header()
 {
 	// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
 	if (suppress_missing_prototypes)
-		statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
+		add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false);
 	if (suppress_incompatible_pointer_types_discard_qualifiers)
-		statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
+		add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false);
 
 	// Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables
 	if (suppress_sometimes_unitialized)
-		statement("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"");
+		add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false);
 
 	// Disable warning about missing braces for array<T> template to make arrays a value type
 	if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
-		statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
+		add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false);
+
+	// Floating point fast math compile declarations
+	if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2))
+	{
+		uint32_t contract_mask = FPFastMathModeAllowContractMask;
+		uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask);
+		uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask);
+
+		// FP math mode
+		uint32_t fp_flags = get_fp_fast_math_flags(true);
+		const char *math_mode = "safe";
+		if ((fp_flags & fast_mask) == fast_mask)		// Must have all flags
+			math_mode = "fast";
+		else if ((fp_flags & relax_mask) == relax_mask)	// Must have all flags
+			math_mode = "relaxed";
+
+		add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false);
+
+		// FP contraction
+		const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ?  "fast" : "off";
+		add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false);
+	}
 
 	for (auto &pragma : pragma_lines)
 		statement(pragma);
 
-	if (!pragma_lines.empty() || suppress_missing_prototypes)
+	if (!pragma_lines.empty())
 		statement("");
 
 	statement("#include <metal_stdlib>");
@@ -5755,18 +5812,23 @@ void CompilerMSL::emit_header()
 		statement("");
 }
 
-void CompilerMSL::add_pragma_line(const string &line)
+void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique)
 {
-	auto rslt = pragma_lines.insert(line);
-	if (rslt.second)
-		force_recompile();
+	if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end())
+	{
+		pragma_lines.push_back(line);
+		if (recompile_on_unique)
+			force_recompile();
+	}
 }
 
 void CompilerMSL::add_typedef_line(const string &line)
 {
-	auto rslt = typedef_lines.insert(line);
-	if (rslt.second)
+	if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end())
+	{
+		typedef_lines.push_back(line);
 		force_recompile();
+	}
 }
 
 // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
@@ -8353,9 +8415,22 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 					if (unique_func_constants[constant_id] == c.self)
 						statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id,
 						          ")]];");
-					statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
-					          ") ? ", bitcast_expression(type, sc_tmp_type, sc_tmp_name), " : ", constant_expression(c),
-					          ";");
+					// RenderDoc and other instrumentation may reuse the same SpecId with different base types.
+					// We deduplicate to one [[function_constant(id)]] temp and then initialize all variants from it.
+					// Metal forbids as_type to/from 'bool', so if either side is Boolean, avoid bitcasting here and
+					// prefer a value cast via a constructor instead (e.g. uint(tmp) / float(tmp) / bool(tmp)).
+					// This preserves expected toggle semantics and prevents illegal MSL like as_type<uint>(bool_tmp).
+					{
+						string sc_true_expr;
+						if (sc_tmp_type == type.basetype)
+							sc_true_expr = sc_tmp_name;
+						else if (sc_tmp_type == SPIRType::Boolean || type.basetype == SPIRType::Boolean)
+							sc_true_expr = join(sc_type_name, "(", sc_tmp_name, ")");
+						else
+							sc_true_expr = bitcast_expression(type, sc_tmp_type, sc_tmp_name);
+						statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
+						          ") ? ", sc_true_expr, " : ", constant_expression(c), ";");
+					}
 				}
 				else if (has_decoration(c.self, DecorationSpecId))
 				{
@@ -9161,7 +9236,12 @@ bool CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, cons
 	// and there is a risk of concurrent write access to other components,
 	// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
 	// The MSL compiler refuses to allow component-level access for any non-packed vector types.
-	if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
+	// MSL refuses to take address or reference to vector component, even for packed types, so just force
+	// through the pointer cast. No much we can do sadly.
+	// For packed types, we could technically omit this if we know the reference does not have to turn into a pointer
+	// of some kind, but that requires external analysis passes to figure out, and
+	// this case is likely rare enough that we don't need to bother.
+	if (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)
 	{
 		const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
 		expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
@@ -9485,21 +9565,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpFMul:
-		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
+		if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
 			MSL_BFOP(spvFMul);
 		else
 			MSL_BOP(*);
 		break;
 
 	case OpFAdd:
-		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
+		if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
 			MSL_BFOP(spvFAdd);
 		else
 			MSL_BOP(+);
 		break;
 
 	case OpFSub:
-		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
+		if (msl_options.invariant_float_math || has_legacy_nocontract(ops[0], ops[1]))
 			MSL_BFOP(spvFSub);
 		else
 			MSL_BOP(-);
@@ -9997,7 +10077,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	case OpVectorTimesMatrix:
 	case OpMatrixTimesVector:
 	{
-		if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
+		if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
 		{
 			CompilerGLSL::emit_instruction(instruction);
 			break;
@@ -10039,7 +10119,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
 	case OpMatrixTimesMatrix:
 	{
-		if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
+		if (!msl_options.invariant_float_math && !has_legacy_nocontract(ops[0], ops[1]))
 		{
 			CompilerGLSL::emit_instruction(instruction);
 			break;
@@ -10725,13 +10805,13 @@ bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rh
 	auto *lhs_var = maybe_get_backing_variable(lhs_id);
 	if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
 		lhs_is_array_template = true;
-	else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
+	else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(lhs_var->basetype)))
 		lhs_is_array_template = false;
 
 	auto *rhs_var = maybe_get_backing_variable(rhs_id);
 	if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
 		rhs_is_array_template = true;
-	else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
+	else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_explicit_layout(get<SPIRType>(rhs_var->basetype)))
 		rhs_is_array_template = false;
 
 	// If threadgroup storage qualifiers are *not* used:
@@ -11182,35 +11262,76 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
 	auto &restype = get<SPIRType>(result_type);
 
+	// Only precise:: preserves NaN in trancendentals (supposedly, cannot find documentation for this).
+	const auto drop_nan_inf = FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask;
+	bool preserve_nan = (get_fp_fast_math_flags_for_op(result_type, id) & drop_nan_inf) != drop_nan_inf;
+	const char *preserve_str = preserve_nan ? "precise" : "fast";
+
+	// TODO: Emit the default behavior to match existing code. Might need to be revisited.
+	// Only fp32 has the precise:: override.
+#define EMIT_PRECISE_OVERRIDE(glsl_op, op) \
+	case GLSLstd450##glsl_op: \
+		if (restype.basetype == SPIRType::Float && preserve_nan) \
+			emit_unary_func_op(result_type, id, args[0], "precise::" op); \
+		else \
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); \
+		break
+
 	switch (op)
 	{
+	EMIT_PRECISE_OVERRIDE(Cos, "cos");
+	EMIT_PRECISE_OVERRIDE(Sin, "sin");
+	EMIT_PRECISE_OVERRIDE(Tan, "tan");
+	EMIT_PRECISE_OVERRIDE(Acos, "acos");
+	EMIT_PRECISE_OVERRIDE(Asin, "asin");
+	EMIT_PRECISE_OVERRIDE(Atan, "atan");
+	EMIT_PRECISE_OVERRIDE(Exp, "exp");
+	EMIT_PRECISE_OVERRIDE(Exp2, "exp2");
+	EMIT_PRECISE_OVERRIDE(Log, "log");
+	EMIT_PRECISE_OVERRIDE(Log2, "log2");
+	EMIT_PRECISE_OVERRIDE(Sqrt, "sqrt");
+#undef EMIT_PRECISE_OVERRIDE
+
 	case GLSLstd450Sinh:
 		if (restype.basetype == SPIRType::Half)
 		{
+			auto ftype = restype;
+			ftype.basetype = SPIRType::Float;
+
 			// MSL does not have overload for half. Force-cast back to half.
-			auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))");
+			auto expr = join(type_to_glsl(restype), "(", preserve_str, "::sinh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
 			emit_op(result_type, id, expr, should_forward(args[0]));
 			inherit_expression_dependencies(id, args[0]);
 		}
+		else if (preserve_nan)
+			emit_unary_func_op(result_type, id, args[0], "precise::sinh");
 		else
 			emit_unary_func_op(result_type, id, args[0], "fast::sinh");
 		break;
 	case GLSLstd450Cosh:
 		if (restype.basetype == SPIRType::Half)
 		{
+			auto ftype = restype;
+			ftype.basetype = SPIRType::Float;
+
 			// MSL does not have overload for half. Force-cast back to half.
-			auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))");
+			auto expr = join(type_to_glsl(restype), "(", preserve_str, "::cosh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
 			emit_op(result_type, id, expr, should_forward(args[0]));
 			inherit_expression_dependencies(id, args[0]);
 		}
+		else if (preserve_nan)
+			emit_unary_func_op(result_type, id, args[0], "precise::cosh");
 		else
 			emit_unary_func_op(result_type, id, args[0], "fast::cosh");
 		break;
 	case GLSLstd450Tanh:
 		if (restype.basetype == SPIRType::Half)
 		{
+			auto ftype = restype;
+			ftype.basetype = SPIRType::Float;
+
 			// MSL does not have overload for half. Force-cast back to half.
-			auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))");
+			auto expr = join(type_to_glsl(restype), "(", preserve_str, "::tanh(", type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), ")))");
 			emit_op(result_type, id, expr, should_forward(args[0]));
 			inherit_expression_dependencies(id, args[0]);
 		}
@@ -11221,7 +11342,13 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		if (restype.basetype == SPIRType::Half)
 		{
 			// MSL does not have overload for half. Force-cast back to half.
-			auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))");
+			auto ftype = restype;
+			ftype.basetype = SPIRType::Float;
+
+			auto expr = join(type_to_glsl(restype),
+			                 "(", preserve_str, "::atan2(",
+			                 type_to_glsl(ftype), "(", to_unpacked_expression(args[0]), "), ",
+			                 type_to_glsl(ftype), "(", to_unpacked_expression(args[1]), ")))");
 			emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]));
 			inherit_expression_dependencies(id, args[0]);
 			inherit_expression_dependencies(id, args[1]);
@@ -11230,7 +11357,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
 		break;
 	case GLSLstd450InverseSqrt:
-		emit_unary_func_op(result_type, id, args[0], "rsqrt");
+		if (restype.basetype == SPIRType::Float && preserve_nan)
+			emit_unary_func_op(result_type, id, args[0], "precise::rsqrt");
+		else
+			emit_unary_func_op(result_type, id, args[0], "rsqrt");
 		break;
 	case GLSLstd450RoundEven:
 		emit_unary_func_op(result_type, id, args[0], "rint");
@@ -11462,11 +11592,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	{
 		auto &exp_type = expression_type(args[0]);
 		// MSL does not support scalar versions here.
-		// MSL has no implementation for normalize in the fast:: namespace for half2 and half3
+		// MSL has no implementation for normalize in the fast:: namespace for half
 		// Returns -1 or 1 for valid input, sign() does the job.
+
+		// precise::normalize asm looks ridiculous.
+		// Don't think this actually matters unless proven otherwise.
 		if (exp_type.vecsize == 1)
 			emit_unary_func_op(result_type, id, args[0], "sign");
-		else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half)
+		else if (exp_type.basetype == SPIRType::Half)
 			emit_unary_func_op(result_type, id, args[0], "normalize");
 		else
 			emit_unary_func_op(result_type, id, args[0], "fast::normalize");
@@ -11529,7 +11662,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
 	case GLSLstd450Pow:
 		// powr makes x < 0.0 undefined, just like SPIR-V.
-		emit_binary_func_op(result_type, id, args[0], args[1], "powr");
+		if (restype.basetype == SPIRType::Float && preserve_nan)
+			emit_binary_func_op(result_type, id, args[0], args[1], "precise::powr");
+		else
+			emit_binary_func_op(result_type, id, args[0], args[1], "powr");
 		break;
 
 	default:
@@ -13753,6 +13889,24 @@ uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltI
 	return loc;
 }
 
+bool CompilerMSL::entry_point_returns_stage_output() const
+{
+	if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
+		return false;
+	bool ep_should_return_output = !get_is_rasterization_disabled();
+	return stage_out_var_id && ep_should_return_output;
+}
+
+bool CompilerMSL::entry_point_requires_const_device_buffers() const
+{
+	// For fragment, we don't really need it, but it might help avoid pessimization
+	// if the compiler deduces it needs to use late-Z for whatever reason.
+	return (get_execution_model() == ExecutionModelFragment && !has_descriptor_side_effects) ||
+	       (entry_point_returns_stage_output() &&
+	        (get_execution_model() == ExecutionModelVertex ||
+	         get_execution_model() == ExecutionModelTessellationEvaluation));
+}
+
 // Returns the type declaration for a function, including the
 // entry type if the current function is the entry point function
 string CompilerMSL::func_type_decl(SPIRType &type)
@@ -13763,8 +13917,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 		return return_type;
 
 	// If an outgoing interface block has been defined, and it should be returned, override the entry point return type
-	bool ep_should_return_output = !get_is_rasterization_disabled();
-	if (stage_out_var_id && ep_should_return_output)
+	if (entry_point_returns_stage_output())
 		return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0);
 
 	// Prepend a entry type, based on the execution model
@@ -13888,16 +14041,14 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
 
 	case StorageClassStorageBuffer:
 	{
-		// For arguments from variable pointers, we use the write count deduction, so
-		// we should not assume any constness here. Only for global SSBOs.
-		bool readonly = false;
-		if (!var || has_decoration(type.self, DecorationBlock))
-			readonly = flags.get(DecorationNonWritable);
-
-		if (decoration_flags_signal_coherent(flags))
-			readonly = false;
-
-		addr_space = readonly ? "const device" : "device";
+		// When dealing with descriptor aliasing, it becomes very problematic to make use of
+		// readonly qualifiers.
+		// If rasterization is not disabled in vertex/tese, Metal does not allow side effects and refuses to compile "device",
+		// even if there are no writes. Just force const device.
+		if (entry_point_requires_const_device_buffers())
+			addr_space = "const device";
+		else
+			addr_space = "device";
 		break;
 	}
 
@@ -13914,7 +14065,12 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
 		{
 			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
 			if (ssbo)
-				addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
+			{
+				if (entry_point_requires_const_device_buffers())
+					addr_space = "const device";
+				else
+					addr_space = "device";
+			}
 			else
 				addr_space = "constant";
 		}
@@ -16553,7 +16709,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_i
 	default:
 		if (type_is_array_of_pointers(type) || using_builtin_array())
 		{
-			const SPIRVariable *var = variable_id ? &get<SPIRVariable>(variable_id) : nullptr;
+			const SPIRVariable *var = variable_id ? maybe_get<SPIRVariable>(variable_id) : nullptr;
 			if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) &&
 			    is_array(get_variable_data_type(*var)))
 			{
@@ -16854,6 +17010,8 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
 		if (p_var && p_var->basevariable)
 			p_var = maybe_get<SPIRVariable>(p_var->basevariable);
 
+		bool has_access_qualifier = true;
+
 		switch (img_type.access)
 		{
 		case AccessQualifierReadOnly:
@@ -16879,12 +17037,21 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb
 
 				img_type_name += "write";
 			}
+			else
+			{
+				has_access_qualifier = false;
+			}
 			break;
 		}
 		}
 
 		if (p_var && has_decoration(p_var->self, DecorationCoherent) && msl_options.supports_msl_version(3, 2))
+		{
+			// Cannot declare memory_coherence_device without access qualifier.
+			if (!has_access_qualifier)
+				img_type_name += ", access::read";
 			img_type_name += ", memory_coherence_device";
+		}
 	}
 
 	img_type_name += ">";
@@ -17317,13 +17484,18 @@ void CompilerMSL::emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t r
 	inherit_expression_dependencies(result_id, op0);
 }
 
+// Note: Metal forbids bitcasting to/from 'bool' using as_type. This function is used widely
+// for generating casts in the backend. To avoid generating illegal MSL when the canonical
+// function constant type (from deduplicated SpecId) is Boolean, fall back to value-cast in
+// that case by returning type_to_glsl(out_type) instead of as_type<...>.
 string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
 	if (out_type.basetype == in_type.basetype)
 		return "";
 
-	assert(out_type.basetype != SPIRType::Boolean);
-	assert(in_type.basetype != SPIRType::Boolean);
+	// Avoid bitcasting to/from booleans in MSL; use value cast instead.
+	if (out_type.basetype == SPIRType::Boolean || in_type.basetype == SPIRType::Boolean)
+		return type_to_glsl(out_type);
 
 	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
 	bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
@@ -17679,6 +17851,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 	case BuiltInGlobalInvocationId:
 		return "thread_position_in_grid";
 
+	case BuiltInWorkgroupSize:
+		return "threads_per_threadgroup";
+
 	case BuiltInWorkgroupId:
 		return "threadgroup_position_in_grid";
 
@@ -17864,6 +18039,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 	case BuiltInLocalInvocationId:
 	case BuiltInNumWorkgroups:
 	case BuiltInWorkgroupId:
+	case BuiltInWorkgroupSize:
 		return "uint3";
 	case BuiltInLocalInvocationIndex:
 	case BuiltInNumSubgroups:
@@ -18531,11 +18707,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 
 	case OpFAdd:
 	case OpFSub:
-		if (compiler.msl_options.invariant_float_math ||
-		    compiler.has_decoration(args[1], DecorationNoContraction))
-		{
+		if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
 			return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
-		}
 		break;
 
 	case OpFMul:
@@ -18543,11 +18716,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 	case OpMatrixTimesVector:
 	case OpVectorTimesMatrix:
 	case OpMatrixTimesMatrix:
-		if (compiler.msl_options.invariant_float_math ||
-		    compiler.has_decoration(args[1], DecorationNoContraction))
-		{
+		if (compiler.msl_options.invariant_float_math || compiler.has_legacy_nocontract(args[0], args[1]))
 			return SPVFuncImplFMul;
-		}
 		break;
 
 	case OpQuantizeToF16:
@@ -19572,7 +19742,7 @@ void CompilerMSL::analyze_argument_buffers()
 				SetBindingPair pair = { desc_set, binding };
 
 				if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
-				    resource.basetype == SPIRType::SampledImage)
+				    resource.basetype == SPIRType::SampledImage || resource.basetype == SPIRType::AccelerationStructure)
 				{
 					// Drop pointer information when we emit the resources into a struct.
 					buffer_type.member_types.push_back(get_variable_data_type_id(var));
@@ -19827,6 +19997,30 @@ bool CompilerMSL::specialization_constant_is_macro(uint32_t const_id) const
 	return constant_macro_ids.find(const_id) != constant_macro_ids.end();
 }
 
+// Start with all fast math flags enabled, and selectively disable based execution modes and float controls
+uint32_t CompilerMSL::get_fp_fast_math_flags(bool incl_ops) const
+{
+	uint32_t fp_flags = ~0;
+	auto &ep = get_entry_point();
+
+	if (ep.flags.get(ExecutionModeSignedZeroInfNanPreserve))
+		fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask);
+
+	if (ep.flags.get(ExecutionModeContractionOff))
+		fp_flags &= ~(FPFastMathModeAllowContractMask);
+
+	for (auto &fp_pair : ep.fp_fast_math_defaults)
+		if (fp_pair.second)
+			fp_flags &= get<SPIRConstant>(fp_pair.second).scalar();
+
+	if (incl_ops)
+		for (auto &p_m : ir.meta)
+			if (p_m.second.decoration.decoration_flags.get(DecorationFPFastMathMode))
+				fp_flags &= p_m.second.decoration.fp_fast_math_mode;
+
+	return fp_flags;
+}
+
 void CompilerMSL::emit_block_hints(const SPIRBlock &)
 {
 }

+ 18 - 3
3rdparty/spirv-cross/spirv_msl.hpp

@@ -542,6 +542,10 @@ public:
 		// The result can be queried with get_is_rasterization_disabled.
 		bool auto_disable_rasterization = false;
 
+		// Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes.
+		// Requires MSL 3.2 or above, and has no effect with earlier MSL versions.
+		bool use_fast_math_pragmas = false;
+
 		bool is_ios() const
 		{
 			return platform == iOS;
@@ -767,6 +771,14 @@ public:
 	// These must only be called after a successful call to CompilerMSL::compile().
 	bool specialization_constant_is_macro(uint32_t constant_id) const;
 
+	// Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied
+	// across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags
+	// for one floating-point type, and the value returned here is a bitwise-AND combination across all types.
+	// If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND
+	// to determine the minimal fast-math that applies to all default execution modes and all operations.
+	// The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff.
+	uint32_t get_fp_fast_math_flags(bool incl_ops) const;
+
 protected:
 	// An enum of SPIR-V functions that are implemented in additional
 	// source code that is added to the shader if necessary.
@@ -1047,6 +1059,8 @@ protected:
 
 	void fix_up_shader_inputs_outputs();
 
+	bool entry_point_returns_stage_output() const;
+	bool entry_point_requires_const_device_buffers() const;
 	std::string func_type_decl(SPIRType &type);
 	std::string entry_point_args_classic(bool append_comma);
 	std::string entry_point_args_argument_buffer(bool append_comma);
@@ -1126,7 +1140,7 @@ protected:
 	                         uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
 	                         bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
 	const char *get_memory_order(uint32_t spv_mem_sem);
-	void add_pragma_line(const std::string &line);
+	void add_pragma_line(const std::string &line, bool recompile_on_unique);
 	void add_typedef_line(const std::string &line);
 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
 	bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
@@ -1209,8 +1223,8 @@ protected:
 	std::unordered_map<uint32_t, uint32_t> fragment_output_components;
 	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
 	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
-	std::set<std::string> pragma_lines;
-	std::set<std::string> typedef_lines;
+	std::vector<std::string> pragma_lines;
+	std::vector<std::string> typedef_lines;
 	SmallVector<uint32_t> vars_needing_early_declaration;
 	std::unordered_set<uint32_t> constant_macro_ids;
 
@@ -1252,6 +1266,7 @@ protected:
 	bool using_builtin_array() const;
 
 	bool is_rasterization_disabled = false;
+	bool has_descriptor_side_effects = false;
 	bool capture_output_to_buffer = false;
 	bool needs_swizzle_buffer_def = false;
 	bool used_swizzle_buffer = false;

+ 112 - 14
3rdparty/spirv-cross/spirv_parser.cpp

@@ -213,8 +213,8 @@ void Parser::parse(const Instruction &instruction)
 
 	case OpSource:
 	{
-		auto lang = static_cast<SourceLanguage>(ops[0]);
-		switch (lang)
+		ir.source.lang = static_cast<SourceLanguage>(ops[0]);
+		switch (ir.source.lang)
 		{
 		case SourceLanguageESSL:
 			ir.source.es = true;
@@ -318,6 +318,19 @@ void Parser::parse(const Instruction &instruction)
 					ir.load_type_width.insert({ ops[1], type->width });
 			}
 		}
+		else if (op == OpExtInst)
+		{
+			// Don't want to deal with ForwardRefs here.
+
+			auto &ext = get<SPIRExtension>(ops[2]);
+			if (ext.ext == SPIRExtension::NonSemanticShaderDebugInfo)
+			{
+				// Parse global ShaderDebugInfo we care about.
+				// Just forward the string information.
+				if (ops[3] == SPIRExtension::DebugSource)
+					set<SPIRString>(ops[1], get<SPIRString>(ops[4]).str);
+			}
+		}
 		break;
 	}
 
@@ -369,6 +382,30 @@ void Parser::parse(const Instruction &instruction)
 			execution.output_primitives = ops[2];
 			break;
 
+		case ExecutionModeSignedZeroInfNanPreserve:
+			switch (ops[2])
+			{
+			case 8:
+				execution.signed_zero_inf_nan_preserve_8 = true;
+				break;
+
+			case 16:
+				execution.signed_zero_inf_nan_preserve_16 = true;
+				break;
+
+			case 32:
+				execution.signed_zero_inf_nan_preserve_32 = true;
+				break;
+
+			case 64:
+				execution.signed_zero_inf_nan_preserve_64 = true;
+				break;
+
+			default:
+				SPIRV_CROSS_THROW("Invalid bit-width for SignedZeroInfNanPreserve.");
+			}
+			break;
+
 		default:
 			break;
 		}
@@ -381,13 +418,21 @@ void Parser::parse(const Instruction &instruction)
 		auto mode = static_cast<ExecutionMode>(ops[1]);
 		execution.flags.set(mode);
 
-		if (mode == ExecutionModeLocalSizeId)
+		switch (mode)
 		{
+		case ExecutionModeLocalSizeId:
 			execution.workgroup_size.id_x = ops[2];
 			execution.workgroup_size.id_y = ops[3];
 			execution.workgroup_size.id_z = ops[4];
-		}
+			break;
+
+		case ExecutionModeFPFastMathDefault:
+			execution.fp_fast_math_defaults[ops[2]] = ops[3];
+			break;
 
+		default:
+			break;
+		}
 		break;
 	}
 
@@ -538,7 +583,7 @@ void Parser::parse(const Instruction &instruction)
 		uint32_t width = ops[1];
 		auto &type = set<SPIRType>(id, op);
 
-		if (width != 16 && length > 2)
+		if (width != 16 && width != 8 && length > 2)
 			SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat.");
 
 		if (width == 64)
@@ -557,6 +602,17 @@ void Parser::parse(const Instruction &instruction)
 			else
 				type.basetype = SPIRType::Half;
 		}
+		else if (width == 8)
+		{
+			if (length < 2)
+				SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8.");
+			else if (ops[2] == spv::FPEncodingFloat8E4M3EXT)
+				type.basetype = SPIRType::FloatE4M3;
+			else if (ops[2] == spv::FPEncodingFloat8E5M2EXT)
+				type.basetype = SPIRType::FloatE5M2;
+			else
+				SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8.");
+		}
 		else
 			SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
 		type.width = width;
@@ -614,15 +670,33 @@ void Parser::parse(const Instruction &instruction)
 		auto &matrixbase = set<SPIRType>(id, base);
 
 		matrixbase.op = op;
-		matrixbase.cooperative.scope_id = ops[2];
-		matrixbase.cooperative.rows_id = ops[3];
-		matrixbase.cooperative.columns_id = ops[4];
-		matrixbase.cooperative.use_id = ops[5];
+		matrixbase.ext.cooperative.scope_id = ops[2];
+		matrixbase.ext.cooperative.rows_id = ops[3];
+		matrixbase.ext.cooperative.columns_id = ops[4];
+		matrixbase.ext.cooperative.use_id = ops[5];
 		matrixbase.self = id;
 		matrixbase.parent_type = ops[1];
 		break;
 	}
 
+	case OpTypeCooperativeVectorNV:
+	{
+		uint32_t id = ops[0];
+		auto &type = set<SPIRType>(id, op);
+
+		type.basetype = SPIRType::CoopVecNV;
+		type.op = op;
+		type.ext.coopVecNV.component_type_id = ops[1];
+		type.ext.coopVecNV.component_count_id = ops[2];
+		type.parent_type = ops[1];
+
+		// CoopVec-Nv can be used with integer operations like SMax where
+		// where spirv-opt does explicit checks on integer bitwidth
+		auto component_type = get<SPIRType>(type.ext.coopVecNV.component_type_id);
+		type.width = component_type.width;
+		break;
+	}
+
 	case OpTypeArray:
 	{
 		uint32_t id = ops[0];
@@ -820,6 +894,20 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpTypeTensorARM:
+	{
+		uint32_t id = ops[0];
+		auto &type = set<SPIRType>(id, op);
+		type.basetype = SPIRType::Tensor;
+		type.ext.tensor = {};
+		type.ext.tensor.type = ops[1];
+		if (length >= 3)
+			type.ext.tensor.rank = ops[2];
+		if (length >= 4)
+			type.ext.tensor.shape = ops[3];
+		break;
+	}
+
 	// Variable declaration
 	// All variables are essentially pointers with a storage qualifier.
 	case OpVariable:
@@ -866,17 +954,27 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
-		// Constants
+	// Constants
 	case OpSpecConstant:
 	case OpConstant:
+	case OpConstantCompositeReplicateEXT:
+	case OpSpecConstantCompositeReplicateEXT:
 	{
 		uint32_t id = ops[1];
 		auto &type = get<SPIRType>(ops[0]);
-
-		if (type.width > 32)
-			set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
+		if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT)
+		{
+			auto subconstant = uint32_t(ops[2]);
+			set<SPIRConstant>(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true);
+		}
 		else
-			set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
+		{
+
+			if (type.width > 32)
+				set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
+			else
+				set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
+		}
 		break;
 	}
 

+ 10 - 1
3rdparty/spirv-cross/spirv_reflect.cpp

@@ -477,6 +477,12 @@ string CompilerReflection::execution_model_to_str(spv::ExecutionModel model)
 		return "rmiss";
 	case ExecutionModelCallableNV:
 		return "rcall";
+	case ExecutionModelMeshNV:
+	case ExecutionModelMeshEXT:
+		return "mesh";
+	case ExecutionModelTaskNV:
+	case ExecutionModelTaskEXT:
+		return "task";
 	default:
 		return "???";
 	}
@@ -504,7 +510,9 @@ void CompilerReflection::emit_entry_points()
 			json_stream->begin_json_object();
 			json_stream->emit_json_key_value("name", e.name);
 			json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
-			if (e.execution_model == ExecutionModelGLCompute)
+			if (e.execution_model == ExecutionModelGLCompute || e.execution_model == spv::ExecutionModelMeshEXT ||
+			    e.execution_model == spv::ExecutionModelMeshNV || e.execution_model == spv::ExecutionModelTaskEXT ||
+			    e.execution_model == spv::ExecutionModelTaskNV)
 			{
 				const auto &spv_entry = get_entry_point(e.name, e.execution_model);
 
@@ -547,6 +555,7 @@ void CompilerReflection::emit_resources()
 	emit_resources("push_constants", res.push_constant_buffers);
 	emit_resources("counters", res.atomic_counters);
 	emit_resources("acceleration_structures", res.acceleration_structures);
+	emit_resources("tensors", res.tensors);
 }
 
 void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resource> &resources)

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio