فهرست منبع

Updated glslang.

Бранимир Караџић 3 ماه پیش
والد
کامیت
f1e0ba4b0b
34فایلهای تغییر یافته به همراه2826 افزوده شده و 1131 حذف شده
  1. 5 3
      3rdparty/glslang/SPIRV/GLSL.ext.ARM.h
  2. 1 1
      3rdparty/glslang/SPIRV/GLSL.ext.EXT.h
  3. 2 0
      3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h
  4. 170 2
      3rdparty/glslang/SPIRV/GlslangToSpv.cpp
  5. 150 0
      3rdparty/glslang/SPIRV/SpvBuilder.cpp
  6. 6 0
      3rdparty/glslang/SPIRV/SpvBuilder.h
  7. 16 3
      3rdparty/glslang/SPIRV/disassemble.cpp
  8. 66 13
      3rdparty/glslang/SPIRV/doc.cpp
  9. 113 4
      3rdparty/glslang/SPIRV/hex_float.h
  10. 54 0
      3rdparty/glslang/SPIRV/spirv.hpp11
  11. 1 1
      3rdparty/glslang/build_info.h
  12. 7 0
      3rdparty/glslang/glslang/Include/BaseTypes.h
  13. 11 0
      3rdparty/glslang/glslang/Include/ConstantUnion.h
  14. 56 13
      3rdparty/glslang/glslang/Include/Types.h
  15. 20 0
      3rdparty/glslang/glslang/Include/intermediate.h
  16. 16 0
      3rdparty/glslang/glslang/MachineIndependent/Constant.cpp
  17. 144 0
      3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp
  18. 62 9
      3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp
  19. 5 3
      3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp
  20. 651 80
      3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp
  21. 5 1
      3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h
  22. 77 12
      3rdparty/glslang/glslang/MachineIndependent/Scan.cpp
  23. 6 1
      3rdparty/glslang/glslang/MachineIndependent/ScanContext.h
  24. 6 1
      3rdparty/glslang/glslang/MachineIndependent/SymbolTable.cpp
  25. 18 2
      3rdparty/glslang/glslang/MachineIndependent/SymbolTable.h
  26. 44 0
      3rdparty/glslang/glslang/MachineIndependent/Versions.cpp
  27. 4 0
      3rdparty/glslang/glslang/MachineIndependent/Versions.h
  28. 77 19
      3rdparty/glslang/glslang/MachineIndependent/glslang.y
  29. 552 543
      3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp
  30. 421 412
      3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp.h
  31. 19 0
      3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp
  32. 35 6
      3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp
  33. 4 0
      3rdparty/glslang/glslang/MachineIndependent/parseVersions.h
  34. 2 2
      3rdparty/glslang/glslang/ResourceLimits/ResourceLimits.cpp

+ 5 - 3
3rdparty/glslang/SPIRV/GLSL.ext.ARM.h

@@ -1,5 +1,5 @@
 /*
-** Copyright (c) 2022 ARM Limited
+** Copyright (c) 2022, 2025 ARM Limited
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a copy
 ** of this software and/or associated documentation files (the "Materials"),
@@ -28,8 +28,10 @@
 #define GLSLextARM_H
 
 static const int GLSLextARMVersion = 100;
-static const int GLSLextARMRevision = 1;
+static const int GLSLextARMRevision = 2;
 
-static const char * const E_SPV_ARM_core_builtins = "SPV_ARM_core_builtins";
+static const char* const E_SPV_ARM_core_builtins = "SPV_ARM_core_builtins";
+static const char* const E_SPV_ARM_cooperative_matrix_layouts = "SPV_ARM_cooperative_matrix_layouts";
+static const char* const E_SPV_ARM_tensors = "SPV_ARM_tensors";
 
 #endif  // #ifndef GLSLextARM_H

+ 1 - 1
3rdparty/glslang/SPIRV/GLSL.ext.EXT.h

@@ -41,6 +41,6 @@ static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader
 static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64";
 static const char* const E_SPV_EXT_shader_tile_image = "SPV_EXT_shader_tile_image";
 static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader";
-static const char* const E_SPV_ARM_cooperative_matrix_layouts = "SPV_ARM_cooperative_matrix_layouts";
+static const char* const E_SPV_EXT_float8 = "SPV_EXT_float8";
 
 #endif  // #ifndef GLSLextEXT_H

+ 2 - 0
3rdparty/glslang/SPIRV/GLSL.ext.QCOM.h

@@ -39,6 +39,8 @@ static const int GLSLextQCOMRevision = 1;
 const char* const E_SPV_QCOM_image_processing = "SPV_QCOM_image_processing";
 //SPV_QCOM_image_processing2
 const char* const E_SPV_QCOM_image_processing2 = "SPV_QCOM_image_processing2";
+//SPV_QCOM_cooperative_matrix_conversion
+const char* const E_SPV_QCOM_cooperative_matrix_conversion = "SPV_QCOM_cooperative_matrix_conversion";
 
 //SPV_QCOM_tile_shading
 const char* const E_SPV_QCOM_tile_shading = "SPV_QCOM_tile_shading";

+ 170 - 2
3rdparty/glslang/SPIRV/GlslangToSpv.cpp

@@ -1505,6 +1505,10 @@ bool IsDescriptorResource(const glslang::TType& type)
         type.getBasicType() == glslang::EbtAccStruct)
         return type.getQualifier().isUniformOrBuffer();
 
+    // Tensors are tied to a descriptor.
+    if (type.isTensorARM())
+        return true;
+
     // None of the above.
     return false;
 }
@@ -2399,6 +2403,16 @@ bool TGlslangToSpvTraverser::visitBinary(glslang::TVisit /* visit */, glslang::T
             node->getRight()->traverse(this);
             spv::Id index = accessChainLoad(node->getRight()->getType());
 
+            // Zero-extend smaller unsigned integer types for array indexing.
+            // SPIR-V OpAccessChain treats indices as signed, so we need to zero-extend
+            // unsigned types to preserve their values (signed types are fine as-is).
+            spv::Id indexType = builder.getTypeId(index);
+            if (builder.isUintType(indexType) && builder.getScalarTypeWidth(indexType) < 32) {
+                // Zero-extend unsigned types to preserve their values
+                spv::Id uintType = builder.makeUintType(32);
+                index = builder.createUnaryOp(spv::Op::OpUConvert, uintType, index);
+            }
+
             addIndirectionIndexCapabilities(node->getLeft()->getType(), node->getRight()->getType());
 
             // restore the saved access chain
@@ -2855,6 +2869,10 @@ bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TI
                 one = builder.makeFloat16Constant(1.0F);
             else if (node->getBasicType() == glslang::EbtBFloat16)
                 one = builder.makeBFloat16Constant(1.0F);
+            else if (node->getBasicType() == glslang::EbtFloatE5M2)
+                one = builder.makeFloatE5M2Constant(1.0F);
+            else if (node->getBasicType() == glslang::EbtFloatE4M3)
+                one = builder.makeFloatE4M3Constant(1.0F);
             else if (node->getBasicType() == glslang::EbtInt8  || node->getBasicType() == glslang::EbtUint8)
                 one = builder.makeInt8Constant(1);
             else if (node->getBasicType() == glslang::EbtInt16 || node->getBasicType() == glslang::EbtUint16)
@@ -3198,6 +3216,14 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
     case glslang::EOpConstructBF16Vec2:
     case glslang::EOpConstructBF16Vec3:
     case glslang::EOpConstructBF16Vec4:
+    case glslang::EOpConstructFloatE5M2:
+    case glslang::EOpConstructFloatE5M2Vec2:
+    case glslang::EOpConstructFloatE5M2Vec3:
+    case glslang::EOpConstructFloatE5M2Vec4:
+    case glslang::EOpConstructFloatE4M3:
+    case glslang::EOpConstructFloatE4M3Vec2:
+    case glslang::EOpConstructFloatE4M3Vec3:
+    case glslang::EOpConstructFloatE4M3Vec4:
     case glslang::EOpConstructBool:
     case glslang::EOpConstructBVec2:
     case glslang::EOpConstructBVec3:
@@ -3240,6 +3266,7 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
     case glslang::EOpConstructCooperativeMatrixNV:
     case glslang::EOpConstructCooperativeMatrixKHR:
     case glslang::EOpConstructCooperativeVectorNV:
+    case glslang::EOpConstructSaturated:
     {
         builder.setDebugSourceLocation(node->getLoc().line, node->getLoc().getFilename());
         std::vector<spv::Id> arguments;
@@ -3277,6 +3304,16 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
             constructed = createCompositeConstruct(resultType(), constituents);
         } else if (isMatrix)
             constructed = builder.createMatrixConstructor(precision, arguments, resultType());
+        else if (node->getOp() == glslang::EOpConstructSaturated) {
+            OpDecorations decorations = { TranslatePrecisionDecoration(node->getOperationPrecision()),
+                                          TranslateNoContractionDecoration(node->getType().getQualifier()),
+                                          TranslateNonUniformDecoration(lvalueCoherentFlags) };
+
+            constructed = createConversion(node->getOp(), decorations, resultType(), arguments[1],
+                                           node->getType().getBasicType(), node->getSequence()[1]->getAsTyped()->getBasicType());
+            builder.addDecoration(constructed, spv::Decoration::SaturatedToLargestFloat8NormalConversionEXT);
+            builder.createStore(constructed, arguments[0]);
+        }
         else
             constructed = builder.createConstructor(precision, arguments, resultType());
 
@@ -3557,6 +3594,9 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
         builder.addCapability(spv::Capability::TextureBlockMatchQCOM);
         builder.addExtension(spv::E_SPV_QCOM_image_processing);
         break;
+    case glslang::EOpTensorWriteARM:
+        noReturnValue = true;
+        break;
 
     case glslang::EOpImageBlockMatchWindowSSDQCOM:
     case glslang::EOpImageBlockMatchWindowSADQCOM:
@@ -3807,6 +3847,10 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
             if (arg == 0 || arg == 2)
                 lvalue = true;
             break;
+        case glslang::EOpTensorReadARM:
+            if (arg == 2)
+                lvalue = true;
+            break;
         default:
             break;
         }
@@ -4218,6 +4262,24 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
         // store the result to the pointer
         builder.createStore(result, operands[0]);
         result = 0;
+    } else if (node->getOp() == glslang::EOpBitCastArrayQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+        result = builder.createUnaryOp(spv::Op::OpBitCastArrayQCOM, resultType(), operands[0]);
+    } else if (node->getOp() == glslang::EOpCompositeConstructCoopMatQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+        result = builder.createUnaryOp(spv::Op::OpCompositeConstructCoopMatQCOM, resultType(), operands[0]);
+    } else if (node->getOp() == glslang::EOpCompositeExtractCoopMatQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+        result = builder.createUnaryOp(spv::Op::OpCompositeExtractCoopMatQCOM, resultType(), operands[0]);
+    } else if (node->getOp() == glslang::EOpExtractSubArrayQCOM) {
+        builder.addCapability(spv::Capability::CooperativeMatrixConversionQCOM);
+        builder.addExtension(spv::E_SPV_QCOM_cooperative_matrix_conversion);
+
+        std::vector<spv::Id> arguments { operands[0], operands[1] };;
+        result = builder.createOp(spv::Op::OpExtractSubArrayQCOM, resultType(), arguments);
     } else if (node->getOp() == glslang::EOpCooperativeVectorMatMulNV ||
                node->getOp() == glslang::EOpCooperativeVectorMatMulAddNV) {
         auto matrixOperands = spv::CooperativeMatrixOperandsMask::MaskNone;
@@ -4308,6 +4370,66 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
         idImmOps.push_back(spv::IdImmediate(true, operands[0])); // A
         builder.createNoResultOp(spv::Op::OpCooperativeVectorReduceSumAccumulateNV, idImmOps);
         result = 0;
+    } else if (node->getOp() == glslang::EOpTensorReadARM ||
+               node->getOp() == glslang::EOpTensorWriteARM) {
+        const bool isWrite = node->getOp() == glslang::EOpTensorWriteARM;
+        const unsigned int tensorMinOperandCount = 3;
+        assert(operands.size() >= tensorMinOperandCount);
+        std::vector<spv::IdImmediate> idImmOps;
+
+        idImmOps.push_back(spv::IdImmediate(true, operands[0])); // tensor
+        idImmOps.push_back(spv::IdImmediate(true, operands[1])); // coords
+        if (isWrite) {
+            idImmOps.push_back(spv::IdImmediate(true, operands[2])); // value
+        }
+
+        // Analyze the tensor operands
+        spv::IdImmediate tensorOperands = { false, uint32_t(spv::TensorOperandsMask::MaskNone) };
+        bool pushExtraArg = false;
+        if (operands.size() > tensorMinOperandCount) {
+            auto enumVal = builder.getConstantScalar(operands[tensorMinOperandCount]);
+
+            if (enumVal & uint32_t(spv::TensorOperandsMask::NontemporalARM)) {
+                tensorOperands.word |= uint32_t(spv::TensorOperandsMask::NontemporalARM);
+            }
+            if (enumVal & uint32_t(spv::TensorOperandsMask::OutOfBoundsValueARM)) {
+                tensorOperands.word |= uint32_t(spv::TensorOperandsMask::OutOfBoundsValueARM);
+                assert(operands.size() >= tensorMinOperandCount + 2 &&
+                    "TensorOperandsOutOfBoundsValueMask requires an additional value");
+                pushExtraArg = true;
+            }
+        }
+
+        // Append optional tensor operands if the mask was non-zero.
+        if (tensorOperands.word) {
+            idImmOps.push_back(tensorOperands);
+            if (pushExtraArg)
+                idImmOps.push_back(spv::IdImmediate(true, operands[tensorMinOperandCount + 1]));
+        }
+
+        if (isWrite) {
+            builder.createNoResultOp(spv::Op::OpTensorWriteARM, idImmOps);
+            result = 0;
+        } else {
+            // Use the result argument type as the OpTensorReadARM result type.
+            const glslang::TType &resArgType = glslangOperands[2]->getAsTyped()->getType();
+            spv::Id retType = convertGlslangToSpvType(resArgType);
+            result = builder.createOp(spv::Op::OpTensorReadARM, retType, idImmOps);
+            // Store the result to the result argument.
+            builder.createStore(result, operands[2]);
+        }
+    } else if (node->getOp() == glslang::EOpTensorSizeARM) {
+        // Expected operands are (tensor, dimension)
+        assert(operands.size() == 2);
+
+        spv::Id tensorOp = operands[0];
+        spv::Id dimOp = operands[1];
+        assert(builder.isTensorTypeARM(builder.getTypeId(tensorOp)) && "operand #0 must be a tensor");
+
+        std::vector<spv::IdImmediate> idImmOps;
+        idImmOps.push_back(spv::IdImmediate(true, tensorOp));
+        idImmOps.push_back(spv::IdImmediate(true, dimOp));
+        result = builder.createOp(spv::Op::OpTensorQuerySizeARM, resultType(), idImmOps);
     } else if (atomic) {
         // Handle all atomics
         glslang::TBasicType typeProxy = (node->getOp() == glslang::EOpAtomicStore)
@@ -5007,6 +5129,12 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
     case glslang::EbtBFloat16:
         spvType = builder.makeBFloat16Type();
         break;
+    case glslang::EbtFloatE5M2:
+        spvType = builder.makeFloatE5M2Type();
+        break;
+    case glslang::EbtFloatE4M3:
+        spvType = builder.makeFloatE4M3Type();
+        break;
     case glslang::EbtInt8:
         spvType = builder.makeIntType(8);
         break;
@@ -5257,6 +5385,11 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
             builder.addCapability(spv::Capability::BFloat16CooperativeMatrixKHR);
         }
 
+        if (type.getBasicType() == glslang::EbtFloatE5M2 || type.getBasicType() == glslang::EbtFloatE4M3) {
+            builder.addExtension(spv::E_SPV_EXT_float8);
+            builder.addCapability(spv::Capability::Float8CooperativeMatrixEXT);
+        }
+
         if (type.getBasicType() == glslang::EbtFloat16)
             builder.addCapability(spv::Capability::Float16);
         if (type.getBasicType() == glslang::EbtUint8 || type.getBasicType() == glslang::EbtInt8) {
@@ -5266,10 +5399,29 @@ spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& ty
         spv::Id scope = makeArraySizeId(*type.getTypeParameters()->arraySizes, 0);
         spv::Id rows = makeArraySizeId(*type.getTypeParameters()->arraySizes, 1);
         spv::Id cols = makeArraySizeId(*type.getTypeParameters()->arraySizes, 2);
-        spv::Id use = builder.makeUintConstant(type.getCoopMatKHRuse());
+        spv::Id use = makeArraySizeId(*type.getTypeParameters()->arraySizes, 3, true);
 
         spvType = builder.makeCooperativeMatrixTypeKHR(spvType, scope, rows, cols, use);
     }
+    else if (type.isTensorARM()) {
+        builder.addCapability(spv::Capability::TensorsARM);
+        builder.addExtension(spv::E_SPV_ARM_tensors);
+        if (type.getBasicType() == glslang::EbtInt8 || type.getBasicType() == glslang::EbtUint8) {
+            builder.addCapability(spv::Capability::Int8);
+        } else if (type.getBasicType() == glslang::EbtInt16 ||
+                   type.getBasicType() == glslang::EbtUint16) {
+            builder.addCapability(spv::Capability::Int16);
+        } else if (type.getBasicType() == glslang::EbtInt64 ||
+                   type.getBasicType() == glslang::EbtUint64) {
+            builder.addCapability(spv::Capability::Int64);
+        } else if (type.getBasicType() == glslang::EbtFloat16) {
+            builder.addCapability(spv::Capability::Float16);
+        }
+
+        spv::Id rank = makeArraySizeId(*type.getTypeParameters()->arraySizes, 0);
+
+        spvType = builder.makeTensorTypeARM(spvType, rank);
+    }
 
     if (type.isCoopVecNV()) {
         builder.addCapability(spv::Capability::CooperativeVectorNV);
@@ -6371,6 +6523,10 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate&
             if (i == 2)
                 lvalue = true;
             break;
+        case glslang::EOpConstructSaturated:
+            if (i == 0)
+                lvalue = true;
+            break;
         default:
             break;
         }
@@ -8124,7 +8280,7 @@ spv::Id TGlslangToSpvTraverser::createConversion(glslang::TOperator op, OpDecora
 
     int vectorSize = builder.isVectorType(destType) ? builder.getNumTypeComponents(destType) : 0;
 
-    if (IsOpNumericConv(op)) {
+    if (IsOpNumericConv(op) || op == glslang::EOpConstructSaturated) {
         if (isTypeSignedInt(operandBasicType) && isTypeFloat(resultBasicType)) {
             convOp = spv::Op::OpConvertSToF;
         }
@@ -10583,6 +10739,12 @@ spv::Id TGlslangToSpvTraverser::createSpvConstantFromConstUnionArray(const glsla
             case glslang::EbtBFloat16:
                 spvConsts.push_back(builder.makeBFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst()));
                 break;
+            case glslang::EbtFloatE5M2:
+                spvConsts.push_back(builder.makeFloatE5M2Constant(zero ? 0.0F : (float)consts[nextConst].getDConst()));
+                break;
+            case glslang::EbtFloatE4M3:
+                spvConsts.push_back(builder.makeFloatE4M3Constant(zero ? 0.0F : (float)consts[nextConst].getDConst()));
+                break;
             default:
                 assert(0);
                 break;
@@ -10638,6 +10800,12 @@ spv::Id TGlslangToSpvTraverser::createSpvConstantFromConstUnionArray(const glsla
         case glslang::EbtBFloat16:
             scalar = builder.makeBFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant);
             break;
+        case glslang::EbtFloatE5M2:
+            scalar = builder.makeFloatE5M2Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant);
+            break;
+        case glslang::EbtFloatE4M3:
+            scalar = builder.makeFloatE4M3Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant);
+            break;
         case glslang::EbtReference:
             scalar = builder.makeUint64Constant(zero ? 0 : consts[nextConst].getU64Const(), specConstant);
             scalar = builder.createUnaryOp(spv::Op::OpBitcast, typeId, scalar);

+ 150 - 0
3rdparty/glslang/SPIRV/SpvBuilder.cpp

@@ -341,6 +341,80 @@ Id Builder::makeBFloat16Type()
     return type->getResultId();
 }
 
+Id Builder::makeFloatE5M2Type()
+{
+    // try to find it
+    Instruction* type;
+    for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypeFloat)].size(); ++t) {
+        type = groupedTypes[enumCast(Op::OpTypeFloat)][t];
+        if (type->getNumOperands() != 2) {
+            continue;
+        }
+        if (type->getImmediateOperand(0) == (unsigned)8 &&
+            type->getImmediateOperand(1) == FPEncoding::Float8E5M2EXT)
+            return type->getResultId();
+    }
+
+    // not found, make it
+    type = new Instruction(getUniqueId(), NoType, Op::OpTypeFloat);
+    type->addImmediateOperand(8);
+    type->addImmediateOperand(FPEncoding::Float8E5M2EXT);
+    groupedTypes[enumCast(Op::OpTypeFloat)].push_back(type);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
+    module.mapInstruction(type);
+
+    addExtension(spv::E_SPV_EXT_float8);
+    addCapability(Capability::Float8EXT);
+
+#if 0
+    // XXX not supported
+    if (emitNonSemanticShaderDebugInfo)
+    {
+        auto const debugResultId = makeFloatDebugType(width);
+        debugId[type->getResultId()] = debugResultId;
+    }
+#endif
+
+    return type->getResultId();
+}
+
+Id Builder::makeFloatE4M3Type()
+{
+    // try to find it
+    Instruction* type;
+    for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypeFloat)].size(); ++t) {
+        type = groupedTypes[enumCast(Op::OpTypeFloat)][t];
+        if (type->getNumOperands() != 2) {
+            continue;
+        }
+        if (type->getImmediateOperand(0) == (unsigned)8 &&
+            type->getImmediateOperand(1) == FPEncoding::Float8E4M3EXT)
+            return type->getResultId();
+    }
+
+    // not found, make it
+    type = new Instruction(getUniqueId(), NoType, Op::OpTypeFloat);
+    type->addImmediateOperand(8);
+    type->addImmediateOperand(FPEncoding::Float8E4M3EXT);
+    groupedTypes[enumCast(Op::OpTypeFloat)].push_back(type);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
+    module.mapInstruction(type);
+
+    addExtension(spv::E_SPV_EXT_float8);
+    addCapability(Capability::Float8EXT);
+
+#if 0
+    // XXX not supported
+    if (emitNonSemanticShaderDebugInfo)
+    {
+        auto const debugResultId = makeFloatDebugType(width);
+        debugId[type->getResultId()] = debugResultId;
+    }
+#endif
+
+    return type->getResultId();
+}
+
 // Make a struct without checking for duplication.
 // See makeStructResultType() for non-decorated structs
 // needed as the result of some instructions, which does
@@ -578,6 +652,26 @@ Id Builder::makeCooperativeVectorTypeNV(Id componentType, Id components)
     return type->getResultId();
 }
 
+Id Builder::makeTensorTypeARM(Id elementType, Id rank)
+{
+    // See if an OpTypeTensorARM with same element type and rank already exists.
+    for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypeTensorARM)].size(); ++t) {
+        const Instruction *type = groupedTypes[enumCast(Op::OpTypeTensorARM)][t];
+        if (type->getIdOperand(0) == elementType && type->getIdOperand(1) == rank)
+            return type->getResultId();
+    }
+
+    // Not found, make it.
+    std::unique_ptr<Instruction> type(new Instruction(getUniqueId(), NoType, Op::OpTypeTensorARM));
+    type->addIdOperand(elementType);
+    type->addIdOperand(rank);
+    groupedTypes[enumCast(Op::OpTypeTensorARM)].push_back(type.get());
+    module.mapInstruction(type.get());
+    Id resultID = type->getResultId();
+    constantsTypesGlobals.push_back(std::move(type));
+    return resultID;
+}
+
 Id Builder::makeGenericType(spv::Op opcode, std::vector<spv::IdImmediate>& operands)
 {
     // try to find it
@@ -1897,6 +1991,62 @@ Id Builder::makeBFloat16Constant(float bf16, bool specConstant)
     return c->getResultId();
 }
 
+Id Builder::makeFloatE5M2Constant(float fe5m2, bool specConstant)
+{
+    Op opcode = specConstant ? Op::OpSpecConstant : Op::OpConstant;
+    Id typeId = makeFloatE5M2Type();
+
+    spvutils::HexFloat<spvutils::FloatProxy<float>> fVal(fe5m2);
+    spvutils::HexFloat<spvutils::FloatProxy<spvutils::FloatE5M2>> fe5m2Val(0);
+    fVal.castTo(fe5m2Val, spvutils::kRoundToZero);
+
+    unsigned value = fe5m2Val.value().getAsFloat().get_value();
+
+    // See if we already made it. Applies only to regular constants, because specialization constants
+    // must remain distinct for the purpose of applying a SpecId decoration.
+    if (!specConstant) {
+        Id existing = findScalarConstant(Op::OpTypeFloat, opcode, typeId, value);
+        if (existing)
+            return existing;
+    }
+
+    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
+    c->addImmediateOperand(value);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
+    groupedConstants[enumCast(Op::OpTypeFloat)].push_back(c);
+    module.mapInstruction(c);
+
+    return c->getResultId();
+}
+
+Id Builder::makeFloatE4M3Constant(float fe4m3, bool specConstant)
+{
+    Op opcode = specConstant ? Op::OpSpecConstant : Op::OpConstant;
+    Id typeId = makeFloatE4M3Type();
+
+    spvutils::HexFloat<spvutils::FloatProxy<float>> fVal(fe4m3);
+    spvutils::HexFloat<spvutils::FloatProxy<spvutils::FloatE4M3>> fe4m3Val(0);
+    fVal.castTo(fe4m3Val, spvutils::kRoundToZero);
+
+    unsigned value = fe4m3Val.value().getAsFloat().get_value();
+
+    // See if we already made it. Applies only to regular constants, because specialization constants
+    // must remain distinct for the purpose of applying a SpecId decoration.
+    if (!specConstant) {
+        Id existing = findScalarConstant(Op::OpTypeFloat, opcode, typeId, value);
+        if (existing)
+            return existing;
+    }
+
+    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
+    c->addImmediateOperand(value);
+    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
+    groupedConstants[enumCast(Op::OpTypeFloat)].push_back(c);
+    module.mapInstruction(c);
+
+    return c->getResultId();
+}
+
 Id Builder::makeFpConstant(Id type, double d, bool specConstant)
 {
     const int width = getScalarTypeWidth(type);

+ 6 - 0
3rdparty/glslang/SPIRV/SpvBuilder.h

@@ -209,6 +209,8 @@ public:
     Id makeUintType(int width) { return makeIntegerType(width, false); }
     Id makeFloatType(int width);
     Id makeBFloat16Type();
+    Id makeFloatE5M2Type();
+    Id makeFloatE4M3Type();
     Id makeStructType(const std::vector<Id>& members, const char* name, bool const compilerGenerated = true);
     Id makeStructResultType(Id type0, Id type1);
     Id makeVectorType(Id component, int size);
@@ -223,6 +225,7 @@ public:
     Id makeCooperativeMatrixTypeNV(Id component, Id scope, Id rows, Id cols);
     Id makeCooperativeMatrixTypeWithSameShape(Id component, Id otherType);
     Id makeCooperativeVectorTypeNV(Id componentType, Id components);
+    Id makeTensorTypeARM(Id elementType, Id rank);
     Id makeGenericType(spv::Op opcode, std::vector<spv::IdImmediate>& operands);
 
     // SPIR-V NonSemantic Shader DebugInfo Instructions
@@ -320,6 +323,7 @@ public:
     }
     bool isTensorViewType(Id typeId) const { return getTypeClass(typeId) == Op::OpTypeTensorViewNV; }
     bool isCooperativeVectorType(Id typeId) const { return getTypeClass(typeId) == Op::OpTypeCooperativeVectorNV; }
+    bool isTensorTypeARM(Id typeId)    const { return getTypeClass(typeId) == Op::OpTypeTensorARM; }
     bool isAggregateType(Id typeId)    const
         { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); }
     bool isImageType(Id typeId)        const { return getTypeClass(typeId) == Op::OpTypeImage; }
@@ -414,6 +418,8 @@ public:
     Id makeDoubleConstant(double d, bool specConstant = false);
     Id makeFloat16Constant(float f16, bool specConstant = false);
     Id makeBFloat16Constant(float bf16, bool specConstant = false);
+    Id makeFloatE5M2Constant(float fe5m2, bool specConstant = false);
+    Id makeFloatE4M3Constant(float fe4m3, bool specConstant = false);
     Id makeFpConstant(Id type, double d, bool specConstant = false);
 
     Id importNonSemanticShaderDebugInfoInstructions();

+ 16 - 3
3rdparty/glslang/SPIRV/disassemble.cpp

@@ -61,7 +61,7 @@ namespace spv {
         #include "GLSL.ext.QCOM.h"
     }
 }
-const char* GlslStd450DebugNames[spv::GLSLstd450Count];
+static const char* GlslStd450DebugNames[spv::GLSLstd450Count];
 
 namespace spv {
 
@@ -382,9 +382,22 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode,
                 break;
             case Op::OpTypeFloat:
                 switch (stream[word]) {
+                case 8:
                 case 16:
-                    if (numOperands > 1 && stream[word+1] == spv::FPEncoding::BFloat16KHR) {
-                        idDescriptor[resultId] = "bfloat16_t";
+                    if (numOperands > 1) {
+                        switch (stream[word+1]) {
+                        default:
+                            assert(0); [[fallthrough]];
+                        case (int)spv::FPEncoding::BFloat16KHR:
+                            idDescriptor[resultId] = "bfloat16_t";
+                            break;
+                        case (int)spv::FPEncoding::Float8E4M3EXT:
+                            idDescriptor[resultId] = "floate4m3_t";
+                            break;
+                        case (int)spv::FPEncoding::Float8E5M2EXT:
+                            idDescriptor[resultId] = "floate5m2_t";
+                            break;
+                        }
                     } else {
                         idDescriptor[resultId] = "float16_t";
                     }

+ 66 - 13
3rdparty/glslang/SPIRV/doc.cpp

@@ -344,6 +344,8 @@ const char* DecorationString(int decoration)
     case (int)Decoration::AliasedPointerEXT:       return "DecorationAliasedPointerEXT";
 
     case (int)Decoration::HitObjectShaderRecordBufferNV:  return "DecorationHitObjectShaderRecordBufferNV";
+
+    case (int)Decoration::SaturatedToLargestFloat8NormalConversionEXT: return "DecorationSaturatedToLargestFloat8NormalConversionEXT";
     }
 }
 
@@ -975,7 +977,7 @@ const char* CapabilityString(int info)
     case (int)Capability::SubgroupBallotKHR: return "SubgroupBallotKHR";
     case (int)Capability::DrawParameters:    return "DrawParameters";
     case (int)Capability::SubgroupVoteKHR:   return "SubgroupVoteKHR";
-    case (int)Capability::GroupNonUniformRotateKHR: return "CapabilityGroupNonUniformRotateKHR";
+    case (int)Capability::GroupNonUniformRotateKHR: return "GroupNonUniformRotateKHR";
 
     case (int)Capability::StorageUniformBufferBlock16: return "StorageUniformBufferBlock16";
     case (int)Capability::StorageUniform16:            return "StorageUniform16";
@@ -1021,7 +1023,7 @@ const char* CapabilityString(int info)
     case (int)Capability::RayTracingPositionFetchKHR:      return "RayTracingPositionFetchKHR";
     case (int)Capability::DisplacementMicromapNV:           return "DisplacementMicromapNV";
     case (int)Capability::RayTracingOpacityMicromapEXT:    return "RayTracingOpacityMicromapEXT";
-    case (int)Capability::RayTracingDisplacementMicromapNV: return "CapabilityRayTracingDisplacementMicromapNV";
+    case (int)Capability::RayTracingDisplacementMicromapNV: return "RayTracingDisplacementMicromapNV";
     case (int)Capability::RayQueryPositionFetchKHR:        return "RayQueryPositionFetchKHR";
     case (int)Capability::ComputeDerivativeGroupQuadsNV:   return "ComputeDerivativeGroupQuadsNV";
     case (int)Capability::ComputeDerivativeGroupLinearNV:  return "ComputeDerivativeGroupLinearNV";
@@ -1069,15 +1071,16 @@ const char* CapabilityString(int info)
     case (int)Capability::CooperativeVectorNV:                     return "CooperativeVectorNV";
     case (int)Capability::CooperativeVectorTrainingNV:             return "CooperativeVectorTrainingNV";
 
-    case (int)Capability::FragmentShaderSampleInterlockEXT:        return "CapabilityFragmentShaderSampleInterlockEXT";
-    case (int)Capability::FragmentShaderPixelInterlockEXT:         return "CapabilityFragmentShaderPixelInterlockEXT";
-    case (int)Capability::FragmentShaderShadingRateInterlockEXT:   return "CapabilityFragmentShaderShadingRateInterlockEXT";
+    case (int)Capability::FragmentShaderSampleInterlockEXT:        return "FragmentShaderSampleInterlockEXT";
+    case (int)Capability::FragmentShaderPixelInterlockEXT:         return "FragmentShaderPixelInterlockEXT";
+    case (int)Capability::FragmentShaderShadingRateInterlockEXT:   return "FragmentShaderShadingRateInterlockEXT";
 
     case (int)Capability::TileImageColorReadAccessEXT:           return "TileImageColorReadAccessEXT";
     case (int)Capability::TileImageDepthReadAccessEXT:           return "TileImageDepthReadAccessEXT";
     case (int)Capability::TileImageStencilReadAccessEXT:         return "TileImageStencilReadAccessEXT";
 
     case (int)Capability::CooperativeMatrixLayoutsARM:             return "CooperativeMatrixLayoutsARM";
+    case (int)Capability::TensorsARM:                              return "TensorsARM";
 
     case (int)Capability::FragmentShadingRateKHR:                  return "FragmentShadingRateKHR";
 
@@ -1087,7 +1090,7 @@ const char* CapabilityString(int info)
     case (int)Capability::QuadControlKHR:                          return "QuadControlKHR";
     case (int)Capability::Int64ImageEXT:                           return "Int64ImageEXT";
 
-    case (int)Capability::IntegerFunctions2INTEL:              return "CapabilityIntegerFunctions2INTEL";
+    case (int)Capability::IntegerFunctions2INTEL:              return "IntegerFunctions2INTEL";
 
     case (int)Capability::ExpectAssumeKHR:                         return "ExpectAssumeKHR";
 
@@ -1098,9 +1101,9 @@ const char* CapabilityString(int info)
     case (int)Capability::AtomicFloat32MinMaxEXT:                  return "AtomicFloat32MinMaxEXT";
     case (int)Capability::AtomicFloat64MinMaxEXT:                  return "AtomicFloat64MinMaxEXT";
 
-    case (int)Capability::WorkgroupMemoryExplicitLayoutKHR:            return "CapabilityWorkgroupMemoryExplicitLayoutKHR";
-    case (int)Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR:  return "CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR";
-    case (int)Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR: return "CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR";
+    case (int)Capability::WorkgroupMemoryExplicitLayoutKHR:            return "WorkgroupMemoryExplicitLayoutKHR";
+    case (int)Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR:  return "WorkgroupMemoryExplicitLayout8BitAccessKHR";
+    case (int)Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR: return "WorkgroupMemoryExplicitLayout16BitAccessKHR";
     case (int)Capability::CoreBuiltinsARM:                             return "CoreBuiltinsARM";
 
     case (int)Capability::ShaderInvocationReorderNV:                return "ShaderInvocationReorderNV";
@@ -1111,7 +1114,9 @@ const char* CapabilityString(int info)
     case (int)Capability::TileShadingQCOM:                     return "TileShadingQCOM";
     case (int)Capability::TextureBlockMatch2QCOM:              return "TextureBlockMatch2QCOM";
 
-    case (int)Capability::ReplicatedCompositesEXT:             return "CapabilityReplicatedCompositesEXT";
+    case (int)Capability::CooperativeMatrixConversionQCOM:     return "CooperativeMatrixConversionQCOM";
+
+    case (int)Capability::ReplicatedCompositesEXT:             return "ReplicatedCompositesEXT";
 
     case (int)Capability::DotProductKHR:                       return "DotProductKHR";
     case (int)Capability::DotProductInputAllKHR:               return "DotProductInputAllKHR";
@@ -1123,9 +1128,12 @@ const char* CapabilityString(int info)
     case (int)Capability::RayTracingSpheresGeometryNV:             return "RayTracingSpheresGeometryNV";
     case (int)Capability::RayTracingLinearSweptSpheresGeometryNV:  return "RayTracingLinearSweptSpheresGeometryNV";
 
-    case (int)Capability::BFloat16TypeKHR:                     return "CapabilityBFloat16TypeKHR";
-    case (int)Capability::BFloat16DotProductKHR:               return "CapabilityBFloat16DotProductKHR";
-    case (int)Capability::BFloat16CooperativeMatrixKHR:        return "CapabilityBFloat16CooperativeMatrixKHR";
+    case (int)Capability::BFloat16TypeKHR:                     return "BFloat16TypeKHR";
+    case (int)Capability::BFloat16DotProductKHR:               return "BFloat16DotProductKHR";
+    case (int)Capability::BFloat16CooperativeMatrixKHR:        return "BFloat16CooperativeMatrixKHR";
+
+    case (int)Capability::Float8EXT:                           return "Float8EXT";
+    case (int)Capability::Float8CooperativeMatrixEXT:          return "Float8CooperativeMatrixEXT";
 
     default: return "Bad";
     }
@@ -1621,6 +1629,11 @@ const char* OpcodeString(int op)
     case (int)Op::OpTensorViewSetStrideNV:           return "OpTensorViewSetStrideNV";
     case (int)Op::OpTensorViewSetClipNV:             return "OpTensorViewSetClipNV";
 
+    case (int)Op::OpTypeTensorARM:                   return "OpTypeTensorARM";
+    case (int)Op::OpTensorReadARM:                   return "OpTensorReadARM";
+    case (int)Op::OpTensorWriteARM:                  return "OpTensorWriteARM";
+    case (int)Op::OpTensorQuerySizeARM:              return "OpTensorQuerySizeARM";
+
     case (int)Op::OpTypeCooperativeVectorNV:         return "OpTypeCooperativeVectorNV";
     case (int)Op::OpCooperativeVectorMatrixMulNV:    return "OpCooperativeVectorMatrixMulNV";
     case (int)Op::OpCooperativeVectorMatrixMulAddNV: return "OpCooperativeVectorMatrixMulAddNV";
@@ -1689,6 +1702,11 @@ const char* OpcodeString(int op)
     case (int)Op::OpImageBlockMatchGatherSSDQCOM:    return "OpImageBlockMatchGatherSSDQCOM";
     case (int)Op::OpImageBlockMatchGatherSADQCOM:    return "OpImageBlockMatchGatherSADQCOM";
 
+    case (int)Op::OpBitCastArrayQCOM:                return "OpBitCastArrayQCOM";
+    case (int)Op::OpCompositeConstructCoopMatQCOM:   return "OpCompositeConstructCoopMatQCOM";
+    case (int)Op::OpCompositeExtractCoopMatQCOM:     return "OpCompositeExtractCoopMatQCOM";
+    case (int)Op::OpExtractSubArrayQCOM:             return "OpExtractSubArrayQCOM";
+
     case (int)Op::OpConstantCompositeReplicateEXT: return "OpConstantCompositeReplicateEXT";
     case (int)Op::OpSpecConstantCompositeReplicateEXT: return "OpSpecConstantCompositeReplicateEXT";
     case (int)Op::OpCompositeConstructReplicateEXT: return "OpCompositeConstructReplicateEXT";
@@ -1828,6 +1846,10 @@ void Parameterize()
         InstructionDesc[enumCast(Op::OpCooperativeVectorOuterProductAccumulateNV)].setResultAndType(false, false);
         InstructionDesc[enumCast(Op::OpCooperativeVectorReduceSumAccumulateNV)].setResultAndType(false, false);
 
+        InstructionDesc[enumCast(Op::OpTypeTensorARM)].setResultAndType(true, false);
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].setResultAndType(true, true);
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].setResultAndType(false, false);
+
         // Specific additional context-dependent operands
 
         ExecutionModeOperands[enumCast(ExecutionMode::Invocations)].push(OperandLiteralNumber, "'Number of <<Invocation,invocations>>'");
@@ -3709,6 +3731,19 @@ void Parameterize()
         InstructionDesc[enumCast(Op::OpImageBlockMatchGatherSADQCOM)].operands.push(OperandImageOperands, "", true);
         InstructionDesc[enumCast(Op::OpImageBlockMatchGatherSADQCOM)].setResultAndType(true, true);
 
+        InstructionDesc[enumCast(Op::OpBitCastArrayQCOM)].operands.push(OperandId, "'source array'");
+        InstructionDesc[enumCast(Op::OpBitCastArrayQCOM)].setResultAndType(true, true);
+
+        InstructionDesc[enumCast(Op::OpCompositeConstructCoopMatQCOM)].operands.push(OperandId, "'source array'");
+        InstructionDesc[enumCast(Op::OpCompositeConstructCoopMatQCOM)].setResultAndType(true, true);
+
+        InstructionDesc[enumCast(Op::OpCompositeExtractCoopMatQCOM)].operands.push(OperandId, "'source cooperative matrix'");
+        InstructionDesc[enumCast(Op::OpCompositeExtractCoopMatQCOM)].setResultAndType(true, true);
+
+        InstructionDesc[enumCast(Op::OpExtractSubArrayQCOM)].operands.push(OperandId, "'source array'");
+        InstructionDesc[enumCast(Op::OpExtractSubArrayQCOM)].operands.push(OperandId, "'start index'");
+        InstructionDesc[enumCast(Op::OpExtractSubArrayQCOM)].setResultAndType(true, true);
+
         InstructionDesc[enumCast(Op::OpConstantCompositeReplicateEXT)].operands.push(OperandId, "'Value'");
         InstructionDesc[enumCast(Op::OpSpecConstantCompositeReplicateEXT)].operands.push(OperandId, "'Value'");
         InstructionDesc[enumCast(Op::OpCompositeConstructReplicateEXT)].operands.push(OperandId, "'Value'");
@@ -3800,6 +3835,24 @@ void Parameterize()
         InstructionDesc[enumCast(Op::OpSUDotAccSatKHR)].operands.push(OperandId, "'Vector2'");
         InstructionDesc[enumCast(Op::OpSUDotAccSatKHR)].operands.push(OperandId, "'Accumulator'");
         InstructionDesc[enumCast(Op::OpSUDotAccSatKHR)].operands.push(OperandLiteralNumber, "'PackedVectorFormat'");
+
+        InstructionDesc[enumCast(Op::OpTypeTensorARM)].operands.push(OperandId, "'Element Type'");
+        InstructionDesc[enumCast(Op::OpTypeTensorARM)].operands.push(OperandId, "'Rank'");
+
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandId, "'Tensor'");
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandId, "'Coordinate'");
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandLiteralNumber, "'Tensor Operand'", true);
+        InstructionDesc[enumCast(Op::OpTensorReadARM)].operands.push(OperandVariableIds, "'Tensor Operands'");
+
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandId, "'Tensor'");
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandId, "'Coordinate'");
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandId, "'Object'");
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandLiteralNumber, "'Tensor Operand'", true);
+        InstructionDesc[enumCast(Op::OpTensorWriteARM)].operands.push(OperandVariableIds, "'Tensor Operands'");
+
+        InstructionDesc[enumCast(Op::OpTensorQuerySizeARM)].operands.push(OperandId, "'Tensor'");
+        InstructionDesc[enumCast(Op::OpTensorQuerySizeARM)].operands.push(OperandId, "'Dimension'", true);
+
     });
 }
 

+ 113 - 4
3rdparty/glslang/SPIRV/hex_float.h

@@ -50,6 +50,52 @@ class Float16 {
   uint16_t val;
 };
 
+class FloatE5M2 {
+ public:
+  FloatE5M2(uint8_t v) : val(v) {}
+  FloatE5M2() {}
+  static bool isNan(const FloatE5M2& val) {
+    return ((val.val & 0x7C) == 0x7C) && ((val.val & 0x3) != 0);
+  }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(const FloatE5M2& val) {
+    return ((val.val & 0x7C) == 0x7C) && ((val.val & 0x3) == 0);
+  }
+  FloatE5M2(const FloatE5M2& other) { val = other.val; }
+  uint8_t get_value() const { return val; }
+
+  // Returns the maximum normal value.
+  static FloatE5M2 max() { return FloatE5M2(0x7B); }
+  // Returns the lowest normal value.
+  static FloatE5M2 lowest() { return FloatE5M2(0xFB); }
+
+ private:
+  uint8_t val;
+};
+
+class FloatE4M3 {
+ public:
+  FloatE4M3(uint8_t v) : val(v) {}
+  FloatE4M3() {}
+  static bool isNan(const FloatE4M3& val) {
+    return (val.val & 0x7F) == 0x7F;
+  }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(const FloatE4M3&) {
+    return false;
+  }
+  FloatE4M3(const FloatE4M3& other) { val = other.val; }
+  uint8_t get_value() const { return val; }
+
+  // Returns the maximum normal value.
+  static FloatE4M3 max() { return FloatE4M3(0x7E); }
+  // Returns the lowest normal value.
+  static FloatE4M3 lowest() { return FloatE4M3(0xFE); }
+
+ private:
+  uint8_t val;
+};
+
 // To specialize this type, you must override uint_type to define
 // an unsigned integer that can fit your floating point type.
 // You must also add a isNan function that returns true if
@@ -95,6 +141,30 @@ struct FloatProxyTraits<Float16> {
   static Float16 lowest() { return Float16::lowest(); }
 };
 
+template <>
+struct FloatProxyTraits<FloatE5M2> {
+  typedef uint8_t uint_type;
+  static bool isNan(FloatE5M2 f) { return FloatE5M2::isNan(f); }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(FloatE5M2 f) { return FloatE5M2::isInfinity(f); }
+  // Returns the maximum normal value.
+  static FloatE5M2 max() { return FloatE5M2::max(); }
+  // Returns the lowest normal value.
+  static FloatE5M2 lowest() { return FloatE5M2::lowest(); }
+};
+
+template <>
+struct FloatProxyTraits<FloatE4M3> {
+  typedef uint8_t uint_type;
+  static bool isNan(FloatE4M3 f) { return FloatE4M3::isNan(f); }
+  // Returns true if the given value is any kind of infinity.
+  static bool isInfinity(FloatE4M3 f) { return FloatE4M3::isInfinity(f); }
+  // Returns the maximum normal value.
+  static FloatE4M3 max() { return FloatE4M3::max(); }
+  // Returns the lowest normal value.
+  static FloatE4M3 lowest() { return FloatE4M3::lowest(); }
+};
+
 // Since copying a floating point number (especially if it is NaN)
 // does not guarantee that bits are preserved, this class lets us
 // store the type and use it as a float when necessary.
@@ -182,6 +252,7 @@ struct HexFloatTraits {
   // The bias of the exponent. (How much we need to subtract from the stored
   // value to get the correct value.)
   static const uint32_t exponent_bias = 0;
+  static bool supportsInfinity() { return true; }
 };
 
 // Traits for IEEE float.
@@ -196,6 +267,7 @@ struct HexFloatTraits<FloatProxy<float>> {
   static const uint_type num_exponent_bits = 8;
   static const uint_type num_fraction_bits = 23;
   static const uint_type exponent_bias = 127;
+  static bool supportsInfinity() { return true; }
 };
 
 // Traits for IEEE double.
@@ -210,6 +282,7 @@ struct HexFloatTraits<FloatProxy<double>> {
   static const uint_type num_exponent_bits = 11;
   static const uint_type num_fraction_bits = 52;
   static const uint_type exponent_bias = 1023;
+  static bool supportsInfinity() { return true; }
 };
 
 // Traits for IEEE half.
@@ -224,6 +297,33 @@ struct HexFloatTraits<FloatProxy<Float16>> {
   static const uint_type num_exponent_bits = 5;
   static const uint_type num_fraction_bits = 10;
   static const uint_type exponent_bias = 15;
+  static bool supportsInfinity() { return true; }
+};
+
+template <>
+struct HexFloatTraits<FloatProxy<FloatE5M2>> {
+  typedef uint8_t uint_type;
+  typedef int8_t int_type;
+  typedef uint8_t underlying_type;
+  typedef uint8_t native_type;
+  static const uint_type num_used_bits = 8;
+  static const uint_type num_exponent_bits = 5;
+  static const uint_type num_fraction_bits = 2;
+  static const uint_type exponent_bias = 15;
+  static bool supportsInfinity() { return true; }
+};
+
+template <>
+struct HexFloatTraits<FloatProxy<FloatE4M3>> {
+  typedef uint8_t uint_type;
+  typedef int8_t int_type;
+  typedef uint8_t underlying_type;
+  typedef uint8_t native_type;
+  static const uint_type num_used_bits = 8;
+  static const uint_type num_exponent_bits = 4;
+  static const uint_type num_fraction_bits = 3;
+  static const uint_type exponent_bias = 7;
+  static bool supportsInfinity() { return false; }
 };
 
 enum round_direction {
@@ -243,6 +343,7 @@ class HexFloat {
   typedef typename Traits::int_type int_type;
   typedef typename Traits::underlying_type underlying_type;
   typedef typename Traits::native_type native_type;
+  using Traits_T = Traits;
 
   explicit HexFloat(T f) : value_(f) {}
 
@@ -584,14 +685,22 @@ class HexFloat {
         (getBits() & exponent_mask) == exponent_mask && significand != 0;
     bool is_inf =
         !is_nan &&
-        ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) ||
+        (((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) && other_T::Traits_T::supportsInfinity()) ||
+         ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias + 1) && !other_T::Traits_T::supportsInfinity()) ||
          (significand == 0 && (getBits() & exponent_mask) == exponent_mask));
 
     // If we are Nan or Inf we should pass that through.
     if (is_inf) {
-      other.set_value(BitwiseCast<typename other_T::underlying_type>(
-          static_cast<typename other_T::uint_type>(
-              (negate ? other_T::sign_mask : 0) | other_T::exponent_mask)));
+      if (other_T::Traits_T::supportsInfinity()) {
+        // encode as +/-inf
+        other.set_value(BitwiseCast<typename other_T::underlying_type>(
+            static_cast<typename other_T::uint_type>(
+                (negate ? other_T::sign_mask : 0) | other_T::exponent_mask)));
+      } else {
+        // encode as +/-nan
+        other.set_value(BitwiseCast<typename other_T::underlying_type>(
+            static_cast<typename other_T::uint_type>(negate ? ~0 : ~other_T::sign_mask)));
+      }
       return;
     }
     if (is_nan) {

+ 54 - 0
3rdparty/glslang/SPIRV/spirv.hpp11

@@ -547,6 +547,7 @@ enum class Decoration : unsigned {
     MaxByteOffset = 45,
     AlignmentId = 46,
     MaxByteOffsetId = 47,
+    SaturatedToLargestFloat8NormalConversionEXT = 4216,
     NoSignedWrap = 4469,
     NoUnsignedWrap = 4470,
     WeightTextureQCOM = 4487,
@@ -1069,7 +1070,10 @@ enum class Capability : unsigned {
     TileImageColorReadAccessEXT = 4166,
     TileImageDepthReadAccessEXT = 4167,
     TileImageStencilReadAccessEXT = 4168,
+    TensorsARM = 4174,
     CooperativeMatrixLayoutsARM = 4201,
+    Float8EXT = 4212,
+    Float8CooperativeMatrixEXT = 4213,
     FragmentShadingRateKHR = 4422,
     SubgroupBallotKHR = 4423,
     DrawParameters = 4427,
@@ -1106,6 +1110,7 @@ enum class Capability : unsigned {
     TextureBoxFilterQCOM = 4485,
     TextureBlockMatchQCOM = 4486,
     TileShadingQCOM = 4495,
+    CooperativeMatrixConversionQCOM = 4496,
     TextureBlockMatch2QCOM = 4498,
     Float16ImageAMD = 5008,
     ImageGatherBiasLodAMD = 5009,
@@ -1457,6 +1462,18 @@ enum class TensorAddressingOperandsMask : unsigned {
     DecodeFunc = 0x00000002,
 };
 
+enum class TensorOperandsShift : unsigned {
+    NontemporalARM = 0,
+    OutOfBoundsValueARM = 1,
+    Max = 0x7fffffff,
+};
+
+enum class TensorOperandsMask : unsigned {
+    MaskNone = 0,
+    NontemporalARM = 0x00000001,
+    OutOfBoundsValueARM = 0x00000002,
+};
+
 enum class InitializationModeQualifier : unsigned {
     InitOnDeviceReprogramINTEL = 0,
     InitOnDeviceResetINTEL = 1,
@@ -1543,6 +1560,8 @@ enum class RawAccessChainOperandsMask : unsigned {
 
 enum class FPEncoding : unsigned {
     BFloat16KHR = 0,
+    Float8E4M3EXT = 4214,
+    Float8E5M2EXT = 4215,
     Max = 0x7fffffff,
 };
 
@@ -1921,6 +1940,10 @@ enum class Op : unsigned {
     OpColorAttachmentReadEXT = 4160,
     OpDepthAttachmentReadEXT = 4161,
     OpStencilAttachmentReadEXT = 4162,
+    OpTypeTensorARM = 4163,
+    OpTensorReadARM = 4164,
+    OpTensorWriteARM = 4165,
+    OpTensorQuerySizeARM = 4166,
     OpTerminateInvocation = 4416,
     OpTypeUntypedPointerKHR = 4417,
     OpUntypedVariableKHR = 4418,
@@ -1974,10 +1997,14 @@ enum class Op : unsigned {
     OpImageBoxFilterQCOM = 4481,
     OpImageBlockMatchSSDQCOM = 4482,
     OpImageBlockMatchSADQCOM = 4483,
+    OpBitCastArrayQCOM = 4497,
     OpImageBlockMatchWindowSSDQCOM = 4500,
     OpImageBlockMatchWindowSADQCOM = 4501,
     OpImageBlockMatchGatherSSDQCOM = 4502,
     OpImageBlockMatchGatherSADQCOM = 4503,
+    OpCompositeConstructCoopMatQCOM = 4540,
+    OpCompositeExtractCoopMatQCOM = 4541,
+    OpExtractSubArrayQCOM = 4542,
     OpGroupIAddNonUniformAMD = 5000,
     OpGroupFAddNonUniformAMD = 5001,
     OpGroupFMinNonUniformAMD = 5002,
@@ -2730,6 +2757,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
     case Op::OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
     case Op::OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
     case Op::OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+    case Op::OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
+    case Op::OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
+    case Op::OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
     case Op::OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
     case Op::OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
     case Op::OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
@@ -2777,10 +2808,14 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
     case Op::OpImageBoxFilterQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpImageBlockMatchSSDQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpImageBlockMatchSADQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpBitCastArrayQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpImageBlockMatchWindowSSDQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpImageBlockMatchWindowSADQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpImageBlockMatchGatherSSDQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpImageBlockMatchGatherSADQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpCompositeConstructCoopMatQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpCompositeExtractCoopMatQCOM: *hasResult = true; *hasResultType = true; break;
+    case Op::OpExtractSubArrayQCOM: *hasResult = true; *hasResultType = true; break;
     case Op::OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
     case Op::OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
     case Op::OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
@@ -3596,6 +3631,7 @@ inline const char* DecorationToString(Decoration value) {
     case Decoration::MaxByteOffset: return "MaxByteOffset";
     case Decoration::AlignmentId: return "AlignmentId";
     case Decoration::MaxByteOffsetId: return "MaxByteOffsetId";
+    case Decoration::SaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
     case Decoration::NoSignedWrap: return "NoSignedWrap";
     case Decoration::NoUnsignedWrap: return "NoUnsignedWrap";
     case Decoration::WeightTextureQCOM: return "WeightTextureQCOM";
@@ -3938,7 +3974,10 @@ inline const char* CapabilityToString(Capability value) {
     case Capability::TileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
     case Capability::TileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
     case Capability::TileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
+    case Capability::TensorsARM: return "TensorsARM";
     case Capability::CooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
+    case Capability::Float8EXT: return "Float8EXT";
+    case Capability::Float8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
     case Capability::FragmentShadingRateKHR: return "FragmentShadingRateKHR";
     case Capability::SubgroupBallotKHR: return "SubgroupBallotKHR";
     case Capability::DrawParameters: return "DrawParameters";
@@ -3973,6 +4012,7 @@ inline const char* CapabilityToString(Capability value) {
     case Capability::TextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
     case Capability::TextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
     case Capability::TileShadingQCOM: return "TileShadingQCOM";
+    case Capability::CooperativeMatrixConversionQCOM: return "CooperativeMatrixConversionQCOM";
     case Capability::TextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
     case Capability::Float16ImageAMD: return "Float16ImageAMD";
     case Capability::ImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
@@ -4277,6 +4317,8 @@ inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfReg
 inline const char* FPEncodingToString(FPEncoding value) {
     switch (value) {
     case FPEncoding::BFloat16KHR: return "BFloat16KHR";
+    case FPEncoding::Float8E4M3EXT: return "Float8E4M3EXT";
+    case FPEncoding::Float8E5M2EXT: return "Float8E5M2EXT";
     default: return "Unknown";
     }
 }
@@ -4661,6 +4703,10 @@ inline const char* OpToString(Op value) {
     case Op::OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
     case Op::OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
     case Op::OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
+    case Op::OpTypeTensorARM: return "OpTypeTensorARM";
+    case Op::OpTensorReadARM: return "OpTensorReadARM";
+    case Op::OpTensorWriteARM: return "OpTensorWriteARM";
+    case Op::OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
     case Op::OpTerminateInvocation: return "OpTerminateInvocation";
     case Op::OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
     case Op::OpUntypedVariableKHR: return "OpUntypedVariableKHR";
@@ -4708,10 +4754,14 @@ inline const char* OpToString(Op value) {
     case Op::OpImageBoxFilterQCOM: return "OpImageBoxFilterQCOM";
     case Op::OpImageBlockMatchSSDQCOM: return "OpImageBlockMatchSSDQCOM";
     case Op::OpImageBlockMatchSADQCOM: return "OpImageBlockMatchSADQCOM";
+    case Op::OpBitCastArrayQCOM: return "OpBitCastArrayQCOM";
     case Op::OpImageBlockMatchWindowSSDQCOM: return "OpImageBlockMatchWindowSSDQCOM";
     case Op::OpImageBlockMatchWindowSADQCOM: return "OpImageBlockMatchWindowSADQCOM";
     case Op::OpImageBlockMatchGatherSSDQCOM: return "OpImageBlockMatchGatherSSDQCOM";
     case Op::OpImageBlockMatchGatherSADQCOM: return "OpImageBlockMatchGatherSADQCOM";
+    case Op::OpCompositeConstructCoopMatQCOM: return "OpCompositeConstructCoopMatQCOM";
+    case Op::OpCompositeExtractCoopMatQCOM: return "OpCompositeExtractCoopMatQCOM";
+    case Op::OpExtractSubArrayQCOM: return "OpExtractSubArrayQCOM";
     case Op::OpGroupIAddNonUniformAMD: return "OpGroupIAddNonUniformAMD";
     case Op::OpGroupFAddNonUniformAMD: return "OpGroupFAddNonUniformAMD";
     case Op::OpGroupFMinNonUniformAMD: return "OpGroupFMinNonUniformAMD";
@@ -5161,6 +5211,10 @@ constexpr TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a,
 constexpr TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
 constexpr TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
 constexpr TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
+constexpr TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
+constexpr TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
+constexpr TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
+constexpr TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
 constexpr MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
 constexpr MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
 constexpr MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }

+ 1 - 1
3rdparty/glslang/build_info.h

@@ -35,7 +35,7 @@
 #define GLSLANG_BUILD_INFO
 
 #define GLSLANG_VERSION_MAJOR 15
-#define GLSLANG_VERSION_MINOR 3
+#define GLSLANG_VERSION_MINOR 4
 #define GLSLANG_VERSION_PATCH 0
 #define GLSLANG_VERSION_FLAVOR ""
 

+ 7 - 0
3rdparty/glslang/glslang/Include/BaseTypes.h

@@ -50,6 +50,8 @@ enum TBasicType {
     EbtDouble,
     EbtFloat16,
     EbtBFloat16,
+    EbtFloatE5M2,
+    EbtFloatE4M3,
     EbtInt8,
     EbtUint8,
     EbtInt16,
@@ -72,6 +74,7 @@ enum TBasicType {
     EbtTensorLayoutNV,
     EbtTensorViewNV,
     EbtCoopvecNV,
+    EbtTensorARM,
     // SPIR-V type defined by spirv_type
     EbtSpirvType,
 
@@ -609,6 +612,8 @@ __inline bool isTypeFloat(TBasicType type)
     case EbtDouble:
     case EbtFloat16:
     case EbtBFloat16:
+    case EbtFloatE5M2:
+    case EbtFloatE4M3:
         return true;
     default:
         return false;
@@ -620,6 +625,8 @@ __inline uint32_t GetNumBits(TBasicType type)
     switch (type) {
     case EbtInt8:
     case EbtUint8:
+    case EbtFloatE5M2:
+    case EbtFloatE4M3:
         return 8;
     case EbtBFloat16:
     case EbtFloat16:

+ 11 - 0
3rdparty/glslang/glslang/Include/ConstantUnion.h

@@ -899,6 +899,17 @@ public:
         unionArray = new TConstUnionVector(size, val);
     }
 
+    TConstUnionArray* clone() const
+    {
+        TConstUnionArray *copy = new TConstUnionArray(size());
+        if (unionArray) {
+            for (const auto i : *unionArray) {
+                copy->unionArray->push_back(i);
+            }
+        }
+        return copy;
+    }
+
     int size() const { return unionArray ? (int)unionArray->size() : 0; }
     TConstUnion& operator[](size_t index) { return (*unionArray)[index]; }
     const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; }

+ 56 - 13
3rdparty/glslang/glslang/Include/Types.h

@@ -252,6 +252,8 @@ struct TSampler {   // misnomer now; includes images, textures without sampler,
         case EbtUint:   s.append("u");   break;
         case EbtFloat16: s.append("f16"); break;
         case EbtBFloat16: s.append("bf16"); break;
+        case EbtFloatE5M2: s.append("fe5m2"); break;
+        case EbtFloatE4M3: s.append("fe4m3"); break;
         case EbtInt8:   s.append("i8");  break;
         case EbtUint16: s.append("u8");  break;
         case EbtInt16:  s.append("i16"); break;
@@ -1500,6 +1502,7 @@ public:
     bool coopmatKHR : 1;
     bool coopvecNV  : 1;
     bool tileAttachmentQCOM: 1;
+    uint32_t tensorRankARM : 4;
     TArraySizes* arraySizes;
     const TType* userDef;
     TSourceLoc loc;
@@ -1511,7 +1514,8 @@ public:
     bool isCoopmatNV() const { return coopmatNV; }
     bool isCoopmatKHR() const { return coopmatKHR; }
     bool isCoopvecNV() const { return coopvecNV; }
-    bool isCoopmatOrvec() const { return isCoopmat() || isCoopvecNV(); }
+    bool isTensorARM() const { return tensorRankARM; }
+    bool hasTypeParameter() const { return isCoopmat() || isCoopvecNV() || isTensorARM(); }
 
     bool isTensorLayoutNV() const { return basicType == EbtTensorLayoutNV; }
     bool isTensorViewNV() const { return basicType == EbtTensorViewNV; }
@@ -1530,6 +1534,7 @@ public:
         coopmatKHR = false;
         coopvecNV = false;
         tileAttachmentQCOM = false;
+        tensorRankARM = 0;
         spirvType = nullptr;
     }
 
@@ -1590,7 +1595,7 @@ public:
     explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0,
                    bool isVector = false) :
                             basicType(t), vectorSize(static_cast<uint32_t>(vs) & 0b1111), matrixCols(static_cast<uint32_t>(mc) & 0b1111), matrixRows(static_cast<uint32_t>(mr) & 0b1111), vector1(isVector && vs == 1), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
                             spirvType(nullptr)
                             {
                                 assert(vs >= 0);
@@ -1606,7 +1611,7 @@ public:
     TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0,
           bool isVector = false) :
                             basicType(t), vectorSize(static_cast<uint32_t>(vs) & 0b1111), matrixCols(static_cast<uint32_t>(mc) & 0b1111), matrixRows(static_cast<uint32_t>(mr) & 0b1111), vector1(isVector && vs == 1), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
                             spirvType(nullptr)
                             {
                                 assert(vs >= 0);
@@ -1624,7 +1629,7 @@ public:
     explicit TType(const TPublicType& p) :
                             basicType(p.basicType),
                             vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmatNV(p.coopmatNV), coopmatKHR(p.coopmatKHR), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(p.coopvecNV),
-                            tileAttachmentQCOM(p.tileAttachmentQCOM), arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters),
+                            tileAttachmentQCOM(p.tileAttachmentQCOM), tensorRankARM(p.tensorRankARM), arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters),
                             spirvType(p.spirvType)
                             {
                                 if (basicType == EbtSampler)
@@ -1677,11 +1682,17 @@ public:
                                 if (p.isCoopvecNV() && p.typeParameters) {
                                     basicType = p.typeParameters->basicType;
                                 }
+                                if (p.isTensorARM() && p.typeParameters) {
+                                    basicType = p.typeParameters->basicType;
+                                    if (p.typeParameters->arraySizes->getNumDims() > 0) {
+                                        tensorRankARM = static_cast<uint32_t>(p.typeParameters->arraySizes->getDimSize(0)) & 0b1111;
+                                    }
+                                }
                             }
     // for construction of sampler types
     TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) :
         basicType(EbtSampler), vectorSize(1u), matrixCols(0u), matrixRows(0u), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-        tileAttachmentQCOM(false), arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr),
+        tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr),
         sampler(sampler), typeParameters(nullptr), spirvType(nullptr)
     {
         qualifier.clear();
@@ -1739,7 +1750,7 @@ public:
     // for making structures, ...
     TType(TTypeList* userDef, const TString& n) :
                             basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
                             spirvType(nullptr)
                             {
                                 sampler.clear();
@@ -1749,7 +1760,7 @@ public:
     // For interface blocks
     TType(TTypeList* userDef, const TString& n, const TQualifier& q) :
                             basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false), coopvecNV(false),
-                            tileAttachmentQCOM(false), qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr),
                             spirvType(nullptr)
                             {
                                 sampler.clear();
@@ -1758,7 +1769,7 @@ public:
     // for block reference (first parameter must be EbtReference)
     explicit TType(TBasicType t, const TType &p, const TString& n) :
                             basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmatNV(false), coopmatKHR(false), coopmatKHRuse(0), coopmatKHRUseValid(false),
-                            tileAttachmentQCOM(false), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
+                            tileAttachmentQCOM(false), tensorRankARM(0), arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr),
                             spirvType(nullptr)
                             {
                                 assert(t == EbtReference);
@@ -1798,6 +1809,7 @@ public:
         coopmatKHRUseValid = copyOf.coopmatKHRUseValid;
         coopvecNV = copyOf.isCoopVecNV();
         tileAttachmentQCOM = copyOf.tileAttachmentQCOM;
+        tensorRankARM = copyOf.tensorRankARM;
     }
 
     // Make complete copy of the whole type graph rooted at 'copyOf'.
@@ -1837,6 +1849,7 @@ public:
         return *typeName;
     }
 
+    virtual bool hasFieldName() const { return (fieldName != nullptr); }
     virtual const TString& getFieldName() const
     {
         assert(fieldName);
@@ -1895,7 +1908,8 @@ public:
     virtual void updateImplicitArraySize(int size) { assert(isArray()); arraySizes->updateImplicitSize(size); }
     virtual void setImplicitlySized(bool isImplicitSized) { arraySizes->setImplicitlySized(isImplicitSized); }
     virtual bool isStruct() const { return basicType == EbtStruct || basicType == EbtBlock; }
-    virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16 || basicType == EbtBFloat16; }
+    virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16 ||
+                                                   basicType == EbtBFloat16 || basicType == EbtFloatE5M2 || basicType == EbtFloatE4M3; }
     virtual bool isIntegerDomain() const
     {
         switch (basicType) {
@@ -1916,7 +1930,9 @@ public:
     }
     virtual bool isOpaque() const { return basicType == EbtSampler
             || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery
-            || basicType == EbtHitObjectNV || isTileAttachmentQCOM(); }
+            || basicType == EbtHitObjectNV || isTileAttachmentQCOM()
+            || isTensorARM();
+    }
     virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; }
 
     virtual bool isAttachmentEXT() const { return basicType == EbtSampler && getSampler().isAttachmentEXT(); }
@@ -1933,8 +1949,10 @@ public:
     bool isCoopMatNV() const { return coopmatNV; }
     bool isCoopMatKHR() const { return coopmatKHR; }
     bool isCoopVecNV() const { return coopvecNV; }
-    bool isCoopMatOrVec() const { return isCoopMat() || isCoopVecNV(); }
     bool isTileAttachmentQCOM() const { return tileAttachmentQCOM; }
+    bool isTensorARM() const { return tensorRankARM; }
+    bool hasTypeParameter() const { return isCoopMat() || isCoopVecNV() || isTensorARM(); }
+    int getTensorRankARM() const { return static_cast<int>(tensorRankARM); }
     bool isReference() const { return getBasicType() == EbtReference; }
     bool isSpirvType() const { return getBasicType() == EbtSpirvType; }
     int getCoopMatKHRuse() const { return static_cast<int>(coopmatKHRuse); }
@@ -1996,6 +2014,11 @@ public:
 
     virtual bool containsNonOpaque() const
     {
+        if (isTensorARM()) {
+            // Tensors have a numerical basicType even though it is Opaque
+            return false;
+        }
+
         const auto nonOpaque = [](const TType* t) {
             switch (t->basicType) {
             case EbtVoid:
@@ -2003,6 +2026,8 @@ public:
             case EbtDouble:
             case EbtFloat16:
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
             case EbtInt8:
             case EbtUint8:
             case EbtInt16:
@@ -2039,6 +2064,10 @@ public:
     {
         return containsBasicType(EbtBFloat16);
     }
+    bool contains8BitFloat() const
+    {
+        return containsBasicType(EbtFloatE5M2) || containsBasicType(EbtFloatE4M3);
+    }
     bool contains64BitInt() const
     {
         return containsBasicType(EbtInt64) || containsBasicType(EbtUint64);
@@ -2161,6 +2190,8 @@ public:
         case EbtDouble:            return "double";
         case EbtFloat16:           return "float16_t";
         case EbtBFloat16:          return "bfloat16_t";
+        case EbtFloatE5M2:         return "floate5m2_t";
+        case EbtFloatE4M3:         return "floate4m3_t";
         case EbtInt8:              return "int8_t";
         case EbtUint8:             return "uint8_t";
         case EbtInt16:             return "int16_t";
@@ -2180,6 +2211,7 @@ public:
         case EbtTensorLayoutNV:    return "tensorLayoutNV";
         case EbtTensorViewNV:      return "tensorViewNV";
         case EbtCoopvecNV:         return "coopvecNV";
+        case EbtTensorARM:         return "tensorARM";
         default:                   return "unknown type";
         }
     }
@@ -2792,6 +2824,7 @@ public:
               isCoopMatNV() == right.isCoopMatNV() &&
               isCoopMatKHR() == right.isCoopMatKHR() &&
               isCoopVecNV() == right.isCoopVecNV() &&
+               isTensorARM() == right.isTensorARM() &&
                sameStructType(right, lpidx, rpidx) &&
                sameReferenceType(right);
     }
@@ -2839,8 +2872,8 @@ public:
             else
                 rv = false;
         } else if (isCoopMatKHR() && right.isCoopMatKHR()) {
-            if (getBasicType() == EbtFloat || getBasicType() == EbtFloat16 || getBasicType() == EbtBFloat16)
-                rv = right.getBasicType() == EbtFloat || right.getBasicType() == EbtFloat16 || right.getBasicType() == EbtBFloat16 || right.getBasicType() == EbtCoopmat;
+            if (isFloatingDomain())
+                rv = right.isFloatingDomain() || right.getBasicType() == EbtCoopmat;
             else if (getBasicType() == EbtUint || getBasicType() == EbtUint8 || getBasicType() == EbtUint16)
                 rv = right.getBasicType() == EbtUint || right.getBasicType() == EbtUint8 || right.getBasicType() == EbtUint16 || right.getBasicType() == EbtCoopmat;
             else if (getBasicType() == EbtInt || getBasicType() == EbtInt8 || getBasicType() == EbtInt16)
@@ -2859,9 +2892,18 @@ public:
         if (isTensorViewNV()) {
             return right.isTensorViewNV() && right.typeParameters == nullptr && typeParameters != nullptr;
         }
+        if (isTensorARM()) {
+            return right.isTensorARM() && right.typeParameters == nullptr && typeParameters != nullptr;
+        }
+
         return false;
     }
 
+    bool sameTensorBaseTypeARM(const TType &right) const {
+        return (typeParameters == nullptr || right.typeParameters == nullptr ||
+                (tensorRankARM == right.tensorRankARM && getBasicType() == right.getBasicType()));
+    }
+
     bool sameCoopVecBaseType(const TType &right) const {
         bool rv = false;
 
@@ -3009,6 +3051,7 @@ protected:
     bool coopmatKHRUseValid   : 1;  // True if coopmatKHRuse has been set
     bool coopvecNV       : 1;
     bool tileAttachmentQCOM : 1;
+    uint32_t tensorRankARM       : 4;  // 0 means not a tensor; non-zero indicates the tensor rank.
     TQualifier qualifier;
 
     TArraySizes* arraySizes;    // nullptr unless an array; can be shared across types

+ 20 - 0
3rdparty/glslang/glslang/Include/intermediate.h

@@ -479,6 +479,10 @@ enum TOperator {
     EOpCooperativeVectorOuterProductAccumulateNV,
     EOpCooperativeVectorReduceSumAccumulateNV,
 
+    EOpTensorReadARM,
+    EOpTensorWriteARM,
+    EOpTensorSizeARM,
+
     EOpBeginInvocationInterlock, // Fragment only
     EOpEndInvocationInterlock, // Fragment only
 
@@ -615,6 +619,14 @@ enum TOperator {
     EOpConstructBF16Vec2,
     EOpConstructBF16Vec3,
     EOpConstructBF16Vec4,
+    EOpConstructFloatE5M2,
+    EOpConstructFloatE5M2Vec2,
+    EOpConstructFloatE5M2Vec3,
+    EOpConstructFloatE5M2Vec4,
+    EOpConstructFloatE4M3,
+    EOpConstructFloatE4M3Vec2,
+    EOpConstructFloatE4M3Vec3,
+    EOpConstructFloatE4M3Vec4,
     EOpConstructStruct,
     EOpConstructTextureSampler,
     EOpConstructNonuniform,     // expected to be transformed away, not present in final AST
@@ -623,6 +635,7 @@ enum TOperator {
     EOpConstructCooperativeMatrixKHR,
     EOpConstructCooperativeVectorNV,
     EOpConstructAccStruct,
+    EOpConstructSaturated,
     EOpConstructGuardEnd,
 
     //
@@ -972,6 +985,12 @@ enum TOperator {
     EOpImageBlockMatchGatherSSDQCOM,
     EOpImageBlockMatchGatherSADQCOM,
 
+    // Cooperative Matrix Conversion
+    EOpBitCastArrayQCOM,
+    EOpExtractSubArrayQCOM,
+    EOpCompositeConstructCoopMatQCOM,
+    EOpCompositeExtractCoopMatQCOM,
+
     // GL_NV_cluster_acceleration_structure
     EOpRayQueryGetIntersectionClusterIdNV,
     EOpHitObjectGetClusterIdNV,
@@ -1095,6 +1114,7 @@ public:
     virtual int getVectorSize() const { return type.getVectorSize(); }
     virtual int getMatrixCols() const { return type.getMatrixCols(); }
     virtual int getMatrixRows() const { return type.getMatrixRows(); }
+    virtual int getTensorRankARM() const { return type.getTensorRankARM(); }
     virtual bool isMatrix() const { return type.isMatrix(); }
     virtual bool isArray()  const { return type.isArray(); }
     virtual bool isVector() const { return type.isVector(); }

+ 16 - 0
3rdparty/glslang/glslang/MachineIndependent/Constant.cpp

@@ -152,6 +152,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* right
             case EbtFloat:
             case EbtFloat16:
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
                 if (rightUnionArray[i].getDConst() != 0.0)
                     newConstArray[i].setDConst(leftUnionArray[i].getDConst() / rightUnionArray[i].getDConst());
                 else if (leftUnionArray[i].getDConst() > 0.0)
@@ -505,6 +507,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
             case EbtDouble:
             case EbtFloat16:
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
             case EbtFloat:
                 valf = unionArray[i].getDConst();
                 srcType = CONV_FLOAT;
@@ -554,6 +558,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
             case EbtDouble:
             case EbtFloat16:
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
             case EbtFloat:
                 dstType = CONV_FLOAT;
                 break;
@@ -625,6 +631,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
             case EbtDouble:
             case EbtFloat16:
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
             case EbtFloat:
                 newConstArray[i].setDConst(valf); break;
             case EbtInt8:
@@ -657,6 +665,8 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType)
             case EbtDouble:
             case EbtFloat16:
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
             case EbtFloat: newConstArray[i].setDConst(-unionArray[i].getDConst()); break;
             // Note: avoid UBSAN error regarding negating 0x80000000
             case EbtInt:   newConstArray[i].setIConst(
@@ -950,6 +960,8 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
                 switch(children[0]->getAsTyped()->getBasicType()) {
                 case EbtFloat16:
                 case EbtBFloat16:
+                case EbtFloatE5M2:
+                case EbtFloatE4M3:
                 case EbtFloat:
                 case EbtDouble:
                     newConstArray[comp].setDConst(std::min(childConstUnions[0][arg0comp].getDConst(), childConstUnions[1][arg1comp].getDConst()));
@@ -985,6 +997,8 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
                 switch(children[0]->getAsTyped()->getBasicType()) {
                 case EbtFloat16:
                 case EbtBFloat16:
+                case EbtFloatE5M2:
+                case EbtFloatE4M3:
                 case EbtFloat:
                 case EbtDouble:
                     newConstArray[comp].setDConst(std::max(childConstUnions[0][arg0comp].getDConst(), childConstUnions[1][arg1comp].getDConst()));
@@ -1020,6 +1034,8 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode)
                 switch(children[0]->getAsTyped()->getBasicType()) {
                 case EbtFloat16:
                 case EbtBFloat16:
+                case EbtFloatE5M2:
+                case EbtFloatE4M3:
                 case EbtFloat:
                 case EbtDouble:
                     newConstArray[comp].setDConst(std::min(std::max(childConstUnions[0][arg0comp].getDConst(), childConstUnions[1][arg1comp].getDConst()),

+ 144 - 0
3rdparty/glslang/glslang/MachineIndependent/Initialize.cpp

@@ -4025,6 +4025,47 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "bf16vec3   uintBitsToBFloat16EXT(u16vec3 value);"
             "bf16vec4   uintBitsToBFloat16EXT(u16vec4 value);"
 
+            "int8_t  floate5m2BitsToIntEXT(floate5m2_t value);"
+            "i8vec2  floate5m2BitsToIntEXT(fe5m2vec2 value);"
+            "i8vec3  floate5m2BitsToIntEXT(fe5m2vec3 value);"
+            "i8vec4  floate5m2BitsToIntEXT(fe5m2vec4 value);"
+
+            "uint8_t floate5m2BitsToUintEXT(floate5m2_t value);"
+            "u8vec2  floate5m2BitsToUintEXT(fe5m2vec2 value);"
+            "u8vec3  floate5m2BitsToUintEXT(fe5m2vec3 value);"
+            "u8vec4  floate5m2BitsToUintEXT(fe5m2vec4 value);"
+
+            "floate5m2_t intBitsToFloate5m2EXT(int8_t value);"
+            "fe5m2vec2   intBitsToFloate5m2EXT(i8vec2 value);"
+            "fe5m2vec3   intBitsToFloate5m2EXT(i8vec3 value);"
+            "fe5m2vec4   intBitsToFloate5m2EXT(i8vec4 value);"
+
+            "floate5m2_t uintBitsToFloate5m2EXT(uint8_t value);"
+            "fe5m2vec2   uintBitsToFloate5m2EXT(u8vec2 value);"
+            "fe5m2vec3   uintBitsToFloate5m2EXT(u8vec3 value);"
+            "fe5m2vec4   uintBitsToFloate5m2EXT(u8vec4 value);"
+
+            "int8_t  floate4m3BitsToIntEXT(floate4m3_t value);"
+            "i8vec2  floate4m3BitsToIntEXT(fe4m3vec2 value);"
+            "i8vec3  floate4m3BitsToIntEXT(fe4m3vec3 value);"
+            "i8vec4  floate4m3BitsToIntEXT(fe4m3vec4 value);"
+
+            "uint8_t floate4m3BitsToUintEXT(floate4m3_t value);"
+            "u8vec2  floate4m3BitsToUintEXT(fe4m3vec2 value);"
+            "u8vec3  floate4m3BitsToUintEXT(fe4m3vec3 value);"
+            "u8vec4  floate4m3BitsToUintEXT(fe4m3vec4 value);"
+
+            "floate4m3_t intBitsToFloate4m3EXT(int8_t value);"
+            "fe4m3vec2   intBitsToFloate4m3EXT(i8vec2 value);"
+            "fe4m3vec3   intBitsToFloate4m3EXT(i8vec3 value);"
+            "fe4m3vec4   intBitsToFloate4m3EXT(i8vec4 value);"
+
+            "floate4m3_t uintBitsToFloate4m3EXT(uint8_t value);"
+            "fe4m3vec2   uintBitsToFloate4m3EXT(u8vec2 value);"
+            "fe4m3vec3   uintBitsToFloate4m3EXT(u8vec3 value);"
+            "fe4m3vec4   uintBitsToFloate4m3EXT(u8vec4 value);"
+
+            "void saturatedConvertEXT();"
             "\n");
     }
 
@@ -4777,6 +4818,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
                 "float", "vec2", "vec4",
                 "float16_t", "f16vec2", "f16vec4",
                 "bfloat16_t", "bf16vec2", "bf16vec4",
+                "floate5m2_t", "fe5m2vec2", "fe5m2vec4",
+                "floate4m3_t", "fe4m3vec2", "fe4m3vec4",
                 "double", "dvec2", "dvec4",
                 "int8_t", "i8vec2", "i8vec4",
                 "int16_t", "i16vec2", "i16vec4",
@@ -4841,6 +4884,31 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "\n"
             );
 
+        {
+          std::stringstream coopMatConvFuncs;
+
+          const std::string eltTypes[] = {"uint32_t", "uint", "int32_t", "int", "float32_t", "float", "float16_t"};
+
+          for (auto srcEltTy : eltTypes) {
+            for (auto dstEltTy : eltTypes) {
+              coopMatConvFuncs << "void bitcastQCOM(" << srcEltTy.c_str() << " SrcArr[], " << dstEltTy.c_str()
+                << " DstArr[]);\n";
+            }
+          }
+          coopMatConvFuncs << "\n";
+
+          for (auto eltTy : {"float32_t", "float16_t", "int8_t", "uint8_t", "uint32_t", "uint", "int32_t", "int"}) {
+            coopMatConvFuncs << "void vectorToCoopmatQCOM(" << eltTy << " SrcVec[], coopmat CM);\n";
+            coopMatConvFuncs << "void coopmatToVectorQCOM(coopmat CM, " << eltTy << " Dstvec[]);\n";
+          }
+
+          for (auto eltTy : {"uint32_t", "uint", "int32_t", "int", "float32_t", "float", "float16_t"}) {
+            coopMatConvFuncs << "void extractSubArrayQCOM(" << eltTy << " arr[], uint index, " << eltTy << " subarr[]);\n";
+          }
+
+          commonBuiltins.append(coopMatConvFuncs.str().c_str());
+        }
+
         commonBuiltins.append(
             "tensorLayoutNV createTensorLayoutNV(uint Dim);\n"
             "tensorLayoutNV createTensorLayoutNV(uint Dim, uint Mode);\n"
@@ -4894,6 +4962,29 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "tensorViewNV setTensorViewClipNV(tensorViewNV v, uint clipRowOffset, uint clipRowSpan, uint clipColOffset, uint clipColSpan);\n"
             "\n"
         );
+
+        // GL_ARM_tensors builtins.
+        static const char *tensorDataTypesARM[] = {
+            "bool",
+            "int8_t", "int16_t", "int32_t", "int64_t",
+            "uint8_t", "uint16_t", "uint32_t", "uint64_t",
+            "float16_t", "float32_t", "float64_t",
+        };
+        std::ostringstream ostream;
+        for (auto t : tensorDataTypesARM) {
+            // Scalar
+            ostream << "void tensorReadARM(readonly tensorARM t, uint coords[], out "
+                    << t << " data, uint tensorOperands = 0U, ...);\n";
+            ostream << "void tensorWriteARM(writeonly tensorARM t, uint coords[], "
+                    << t << " data, uint tensorOperands = 0U, ...);\n";
+            // Array
+            ostream << "void tensorReadARM(readonly tensorARM t, uint coords[], "
+                    << t << " data[], uint tensorOperands = 0U, ...);\n";
+            ostream << "void tensorWriteARM(writeonly tensorARM t, uint coords[], "
+                    << t << " data[], uint tensorOperands = 0U, ...);\n";
+        }
+        ostream << "uint tensorSizeARM(readonly writeonly tensorARM t, uint dim);\n";
+        commonBuiltins.append(ostream.str());
     }
 
     if (profile != EEsProfile && version >= 450) {
@@ -8285,6 +8376,12 @@ void TBuiltIns::initialize(const TBuiltInResource &resources, int version, EProf
         snprintf(builtInConstant, maxSize, "const int gl_MaxComputeTextureImageUnits = %d;", resources.maxComputeTextureImageUnits);
         s.append(builtInConstant);
 
+        // GL_ARM_tensors operands.
+        snprintf(builtInConstant, maxSize, "const uint gl_TensorOperandsNonTemporalARM = 0x1U;");
+        s.append(builtInConstant);
+        snprintf(builtInConstant, maxSize, "const uint gl_TensorOperandsOutOfBoundsValueARM = 0x2U;");
+        s.append(builtInConstant);
+
         s.append("\n");
     }
 
@@ -9706,6 +9803,12 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             symbolTable.setFunctionExtensions("setTensorViewClipNV",            1, &E_GL_NV_cooperative_matrix2);
         }
 
+        {
+            symbolTable.setFunctionExtensions("tensorReadARM",   1, &E_GL_ARM_tensors);
+            symbolTable.setFunctionExtensions("tensorWriteARM",  1, &E_GL_ARM_tensors);
+            symbolTable.setFunctionExtensions("tensorSizeARM",   1, &E_GL_ARM_tensors);
+        }
+
         {
             symbolTable.setFunctionExtensions("coopVecMatMulNV",                    1, &E_GL_NV_cooperative_vector);
             symbolTable.setFunctionExtensions("coopVecMatMulAddNV",                 1, &E_GL_NV_cooperative_vector);
@@ -9713,6 +9816,13 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             symbolTable.setFunctionExtensions("coopVecReduceSumAccumulateNV",       1, &E_GL_NV_cooperative_vector);
         }
 
+        {
+          symbolTable.setFunctionExtensions("bitcastQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+          symbolTable.setFunctionExtensions("extractSubArrayQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+          symbolTable.setFunctionExtensions("vectorToCoopmatQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+          symbolTable.setFunctionExtensions("coopmatToVectorQCOM", 1, &E_GL_QCOM_cooperative_matrix_conversion);
+        }
+
         if ((profile != EEsProfile && version >= 450) || (profile == EEsProfile && version >= 320)) {
             symbolTable.setFunctionExtensions("dFdx",                   1, &E_GL_NV_compute_shader_derivatives);
             symbolTable.setFunctionExtensions("dFdy",                   1, &E_GL_NV_compute_shader_derivatives);
@@ -9752,6 +9862,19 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             symbolTable.setFunctionExtensions("bfloat16BitsToUintEXT", 1, &E_GL_EXT_bfloat16);
             symbolTable.setFunctionExtensions("intBitsToBFloat16EXT", 1, &E_GL_EXT_bfloat16);
             symbolTable.setFunctionExtensions("uintBitsToBFloat16EXT", 1, &E_GL_EXT_bfloat16);
+
+            symbolTable.setFunctionExtensions("floate5m2BitsToIntEXT", 1, &E_GL_EXT_float_e5m2);
+            symbolTable.setFunctionExtensions("floate5m2BitsToUintEXT", 1, &E_GL_EXT_float_e5m2);
+            symbolTable.setFunctionExtensions("intBitsToFloate5m2EXT", 1, &E_GL_EXT_float_e5m2);
+            symbolTable.setFunctionExtensions("uintBitsToFloate5m2EXT", 1, &E_GL_EXT_float_e5m2);
+
+            symbolTable.setFunctionExtensions("floate4m3BitsToIntEXT", 1, &E_GL_EXT_float_e4m3);
+            symbolTable.setFunctionExtensions("floate4m3BitsToUintEXT", 1, &E_GL_EXT_float_e4m3);
+            symbolTable.setFunctionExtensions("intBitsToFloate4m3EXT", 1, &E_GL_EXT_float_e4m3);
+            symbolTable.setFunctionExtensions("uintBitsToFloate4m3EXT", 1, &E_GL_EXT_float_e4m3);
+
+            const char *float8exts[] = {E_GL_EXT_float_e5m2, E_GL_EXT_float_e4m3};
+            symbolTable.setFunctionExtensions("saturatedConvertEXT", 2, float8exts);
         }
 
         // E_SPV_QCOM_tile_shading
@@ -10750,6 +10873,18 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             symbolTable.relateToOperator("bfloat16BitsToUintEXT", EOpFloatBitsToUint);
             symbolTable.relateToOperator("intBitsToBFloat16EXT",  EOpIntBitsToFloat);
             symbolTable.relateToOperator("uintBitsToBFloat16EXT", EOpUintBitsToFloat);
+
+            symbolTable.relateToOperator("floate5m2BitsToIntEXT",  EOpFloatBitsToInt);
+            symbolTable.relateToOperator("floate5m2BitsToUintEXT", EOpFloatBitsToUint);
+            symbolTable.relateToOperator("intBitsToFloate5m2EXT",  EOpIntBitsToFloat);
+            symbolTable.relateToOperator("uintBitsToFloate5m2EXT", EOpUintBitsToFloat);
+
+            symbolTable.relateToOperator("floate4m3BitsToIntEXT",  EOpFloatBitsToInt);
+            symbolTable.relateToOperator("floate4m3BitsToUintEXT", EOpFloatBitsToUint);
+            symbolTable.relateToOperator("intBitsToFloate4m3EXT",  EOpIntBitsToFloat);
+            symbolTable.relateToOperator("uintBitsToFloate4m3EXT", EOpUintBitsToFloat);
+
+            symbolTable.relateToOperator("saturatedConvertEXT", EOpConstructSaturated);
         }
 
         // GL_KHR_shader_subgroup
@@ -10997,6 +11132,15 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
         symbolTable.relateToOperator("setTensorViewStrideNV",        EOpTensorViewSetStrideNV);
         symbolTable.relateToOperator("setTensorViewClipNV",          EOpTensorViewSetClipNV);
 
+        symbolTable.relateToOperator("tensorReadARM",                EOpTensorReadARM);
+        symbolTable.relateToOperator("tensorWriteARM",               EOpTensorWriteARM);
+        symbolTable.relateToOperator("tensorSizeARM",                EOpTensorSizeARM);
+
+        symbolTable.relateToOperator("bitcastQCOM", EOpBitCastArrayQCOM);
+        symbolTable.relateToOperator("extractSubArrayQCOM", EOpExtractSubArrayQCOM);
+        symbolTable.relateToOperator("vectorToCoopmatQCOM", EOpCompositeConstructCoopMatQCOM);
+        symbolTable.relateToOperator("coopmatToVectorQCOM", EOpCompositeExtractCoopMatQCOM);
+
         if (profile != EEsProfile && version >= 460) {
             symbolTable.relateToOperator("fetchMicroTriangleVertexPositionNV", EOpFetchMicroTriangleVertexPositionNV);
             symbolTable.relateToOperator("fetchMicroTriangleVertexBarycentricNV", EOpFetchMicroTriangleVertexBarycentricNV);

+ 62 - 9
3rdparty/glslang/glslang/MachineIndependent/Intermediate.cpp

@@ -400,6 +400,8 @@ TIntermTyped* TIntermediate::addUnaryMath(TOperator op, TIntermTyped* child,
     case EOpConstructDouble: newType = EbtDouble; break;
     case EOpConstructFloat16: newType = EbtFloat16; break;
     case EOpConstructBFloat16: newType = EbtBFloat16; break;
+    case EOpConstructFloatE4M3: newType = EbtFloatE4M3; break;
+    case EOpConstructFloatE5M2: newType = EbtFloatE5M2; break;
     default: break; // some compilers want this
     }
 
@@ -430,7 +432,9 @@ TIntermTyped* TIntermediate::addUnaryMath(TOperator op, TIntermTyped* child,
         case EOpConstructFloat:
         case EOpConstructDouble:
         case EOpConstructFloat16:
-        case EOpConstructBFloat16: {
+        case EOpConstructBFloat16:
+        case EOpConstructFloatE5M2:
+        case EOpConstructFloatE4M3: {
             TIntermUnary* unary_node = child->getAsUnaryNode();
             if (unary_node != nullptr)
                 unary_node->updatePrecision();
@@ -571,9 +575,9 @@ bool TIntermediate::isConversionAllowed(TOperator op, TIntermTyped* node) const
 
 bool TIntermediate::buildConvertOp(TBasicType dst, TBasicType src, TOperator& newOp) const
 {
-    // bfloat16_t <-> bool not supported
-    if ((src == EbtBFloat16 && dst == EbtBool) ||
-        (dst == EbtBFloat16 && src == EbtBool)) {
+    // (bfloat16_t,fp8) <-> bool not supported
+    if (((src == EbtBFloat16 || src == EbtFloatE5M2 || src == EbtFloatE4M3) && dst == EbtBool) ||
+        ((dst == EbtBFloat16 || dst == EbtFloatE5M2 || dst == EbtFloatE4M3) && src == EbtBool)) {
         return false;
     }
 
@@ -604,12 +608,15 @@ TIntermTyped* TIntermediate::createConversion(TBasicType convertTo, TIntermTyped
                                 node->getBasicType() == EbtInt   || node->getBasicType() == EbtUint   ||
                                 node->getBasicType() == EbtInt64 || node->getBasicType() == EbtUint64);
 
-    bool convertToFloatTypes = (convertTo == EbtFloat16 || convertTo == EbtBFloat16 || convertTo == EbtFloat || convertTo == EbtDouble);
+    bool convertToFloatTypes = (convertTo == EbtFloat16 || convertTo == EbtBFloat16 || convertTo == EbtFloat || convertTo == EbtDouble ||
+                                convertTo == EbtFloatE5M2 || convertTo == EbtFloatE4M3);
 
     bool convertFromFloatTypes = (node->getBasicType() == EbtFloat16 ||
                                   node->getBasicType() == EbtBFloat16 ||
                                   node->getBasicType() == EbtFloat ||
-                                  node->getBasicType() == EbtDouble);
+                                  node->getBasicType() == EbtDouble ||
+                                  node->getBasicType() == EbtFloatE5M2 ||
+                                  node->getBasicType() == EbtFloatE4M3);
 
     if (((convertTo == EbtInt8 || convertTo == EbtUint8) && ! convertFromIntTypes) ||
         ((node->getBasicType() == EbtInt8 || node->getBasicType() == EbtUint8) && ! convertToIntTypes)) {
@@ -832,7 +839,8 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
     // Reject implicit conversions to cooperative matrix types
     if (node->getType().isCoopMat() &&
         op != EOpConstructCooperativeMatrixNV &&
-        op != EOpConstructCooperativeMatrixKHR)
+        op != EOpConstructCooperativeMatrixKHR &&
+        op != glslang::EOpCompositeConstructCoopMatQCOM)
         return nullptr;
 
     if (node->getType().isTensorLayoutNV() ||
@@ -858,12 +866,15 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
     case EOpConstructDouble:
     case EOpConstructFloat16:
     case EOpConstructBFloat16:
+    case EOpConstructFloatE5M2:
+    case EOpConstructFloatE4M3:
     case EOpConstructInt8:
     case EOpConstructUint8:
     case EOpConstructInt16:
     case EOpConstructUint16:
     case EOpConstructInt64:
     case EOpConstructUint64:
+    case EOpConstructSaturated:
         break;
 
     //
@@ -965,6 +976,8 @@ TIntermTyped* TIntermediate::addConversion(TOperator op, const TType& type, TInt
     //  - at the time of this writing (14-Aug-2020), no test results are changed by this.
     switch (op) {
     case EOpConstructBFloat16:
+    case EOpConstructFloatE5M2:
+    case EOpConstructFloatE4M3:
         canPromoteConstant = true;
         break;
     case EOpConstructFloat16:
@@ -1270,6 +1283,8 @@ bool TIntermediate::isFPPromotion(TBasicType from, TBasicType to) const
     if (to == EbtDouble) {
         switch(from) {
         case EbtBFloat16:
+        case EbtFloatE5M2:
+        case EbtFloatE4M3:
         case EbtFloat16:
         case EbtFloat:
             return true;
@@ -1362,7 +1377,7 @@ bool TIntermediate::isIntegralConversion(TBasicType from, TBasicType to) const
 
 bool TIntermediate::isFPConversion(TBasicType from, TBasicType to) const
 {
-    if (to == EbtFloat && (from == EbtFloat16 || from == EbtBFloat16)) {
+    if (to == EbtFloat && (from == EbtFloat16 || from == EbtBFloat16 || from == EbtFloatE5M2 || from == EbtFloatE4M3)) {
         return true;
     } else {
         return false;
@@ -1517,6 +1532,8 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
                                         (numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) || 
                                         numericFeatures.contains(TNumericFeatures::gpu_shader_half_float));
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
                 return true;
             case EbtInt8:
             case EbtUint8:
@@ -1540,6 +1557,8 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
                     numericFeatures.contains(TNumericFeatures::nv_gpu_shader5_types) ||
                     getSource() == EShSourceHlsl;
             case EbtBFloat16:
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
                 return true;
             case EbtInt8:
             case EbtUint8:
@@ -1610,6 +1629,18 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
             case EbtInt16:
             case EbtUint16:
                 return numericFeatures.contains(TNumericFeatures::gpu_shader_int16);
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
+                return true;
+            default:
+                break;
+            }
+            return false;
+        case EbtBFloat16:
+            switch (from) {
+            case EbtFloatE5M2:
+            case EbtFloatE4M3:
+                return true;
             default:
                 break;
             }
@@ -2077,6 +2108,24 @@ TOperator TIntermediate::mapTypeToConstructorOp(const TType& type) const
         default: break; // some compilers want this
         }
         break;
+    case EbtFloatE5M2:
+        switch (type.getVectorSize()) {
+        case 1: op = EOpConstructFloatE5M2;  break;
+        case 2: op = EOpConstructFloatE5M2Vec2;  break;
+        case 3: op = EOpConstructFloatE5M2Vec3;  break;
+        case 4: op = EOpConstructFloatE5M2Vec4;  break;
+        default: break; // some compilers want this
+        }
+        break;
+    case EbtFloatE4M3:
+        switch (type.getVectorSize()) {
+        case 1: op = EOpConstructFloatE4M3;  break;
+        case 2: op = EOpConstructFloatE4M3Vec2;  break;
+        case 3: op = EOpConstructFloatE4M3Vec3;  break;
+        case 4: op = EOpConstructFloatE4M3Vec4;  break;
+        default: break; // some compilers want this
+        }
+        break;
     case EbtInt8:
         switch(type.getVectorSize()) {
         case 1: op = EOpConstructInt8;   break;
@@ -2486,7 +2535,7 @@ TIntermConstantUnion* TIntermediate::addConstantUnion(bool b, const TSourceLoc&
 
 TIntermConstantUnion* TIntermediate::addConstantUnion(double d, TBasicType baseType, const TSourceLoc& loc, bool literal) const
 {
-    assert(baseType == EbtFloat || baseType == EbtDouble || baseType == EbtFloat16 || baseType == EbtBFloat16);
+    assert(baseType == EbtFloat || baseType == EbtDouble || baseType == EbtFloat16 || baseType == EbtBFloat16 || baseType == EbtFloatE5M2 || baseType == EbtFloatE4M3);
 
     if (isEsProfile() && (baseType == EbtFloat || baseType == EbtFloat16)) {
         int exponent = 0;
@@ -3741,6 +3790,8 @@ TIntermTyped* TIntermediate::promoteConstantUnion(TBasicType promoteTo, TIntermC
 #define TO_ALL(Get)   \
         switch (promoteTo) { \
         case EbtBFloat16: PROMOTE(setDConst, double, Get); break; \
+        case EbtFloatE5M2: PROMOTE(setDConst, double, Get); break; \
+        case EbtFloatE4M3: PROMOTE(setDConst, double, Get); break; \
         case EbtFloat16: PROMOTE(setDConst, double, Get); break; \
         case EbtFloat: PROMOTE(setDConst, double, Get); break; \
         case EbtDouble: PROMOTE(setDConst, double, Get); break; \
@@ -3763,6 +3814,8 @@ TIntermTyped* TIntermediate::promoteConstantUnion(TBasicType promoteTo, TIntermC
         case EbtBool: TO_ALL(getBConst); break;
         case EbtFloat16: TO_ALL(getDConst); break;
         case EbtBFloat16: TO_ALL(getDConst); break;
+        case EbtFloatE5M2: TO_ALL(getDConst); break;
+        case EbtFloatE4M3: TO_ALL(getDConst); break;
         case EbtDouble: TO_ALL(getDConst); break;
         case EbtInt8: TO_ALL(getI8Const); break;
         case EbtInt16: TO_ALL(getI16Const); break;

+ 5 - 3
3rdparty/glslang/glslang/MachineIndependent/ParseContextBase.cpp

@@ -424,7 +424,7 @@ const TFunction* TParseContextBase::selectFunction(
         // to even be a potential match, number of arguments must be >= the number of
         // fixed (non-default) parameters, and <= the total (including parameter with defaults).
         if (call.getParamCount() < candidate.getFixedParamCount() ||
-            call.getParamCount() > candidate.getParamCount())
+            (call.getParamCount() > candidate.getParamCount() && !candidate.isVariadic()))
             continue;
 
         // see if arguments are convertible
@@ -463,7 +463,8 @@ const TFunction* TParseContextBase::selectFunction(
     const auto betterParam = [&call, &better](const TFunction& can1, const TFunction& can2) -> bool {
         // is call -> can2 better than call -> can1 for any parameter
         bool hasBetterParam = false;
-        for (int param = 0; param < call.getParamCount(); ++param) {
+        const int paramCount = std::min({call.getParamCount(), can1.getParamCount(), can2.getParamCount()});
+        for (int param = 0; param < paramCount; ++param) {
             if (better(*call[param].type, *can1[param].type, *can2[param].type)) {
                 hasBetterParam = true;
                 break;
@@ -474,7 +475,8 @@ const TFunction* TParseContextBase::selectFunction(
 
     const auto equivalentParams = [&call, &better](const TFunction& can1, const TFunction& can2) -> bool {
         // is call -> can2 equivalent to call -> can1 for all the call parameters?
-        for (int param = 0; param < call.getParamCount(); ++param) {
+        const int paramCount = std::min({call.getParamCount(), can1.getParamCount(), can2.getParamCount()});
+        for (int param = 0; param < paramCount; ++param) {
             if (better(*call[param].type, *can1[param].type, *can2[param].type) ||
                 better(*call[param].type, *can2[param].type, *can1[param].type))
                 return false;

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 651 - 80
3rdparty/glslang/glslang/MachineIndependent/ParseHelper.cpp


+ 5 - 1
3rdparty/glslang/glslang/MachineIndependent/ParseHelper.h

@@ -407,7 +407,7 @@ public:
     void setDefaultPrecision(const TSourceLoc&, TPublicType&, TPrecisionQualifier);
     int computeSamplerTypeIndex(TSampler&);
     TPrecisionQualifier getDefaultPrecision(TPublicType&);
-    void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&, bool isCoopMatOrVec);
+    void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&, bool hasTypeParameter);
     void parameterTypeCheck(const TSourceLoc&, TStorageQualifier qualifier, const TType& type);
     bool containsFieldWithBasicType(const TType& type ,TBasicType basicType);
     TSymbol* redeclareBuiltinVariable(const TSourceLoc&, const TString&, const TQualifier&, const TShaderQualifiers&);
@@ -450,6 +450,9 @@ public:
     TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&);
     TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&);
     TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset);
+    void makeVariadic(TFunction *F, const TSourceLoc &loc);
+    TParameter getParamWithDefault(const TPublicType& ty, TString* identifier, TIntermTyped* initializer,
+                                   const TSourceLoc& loc);
     void inheritMemoryQualifiers(const TQualifier& from, TQualifier& to);
     void declareBlock(const TSourceLoc&, TTypeList& typeList, const TString* instanceName = nullptr, TArraySizes* arraySizes = nullptr);
     void blockStorageRemap(const TSourceLoc&, const TString*, TQualifier&);
@@ -510,6 +513,7 @@ protected:
     TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer);
     void finish() override;
     void handleCoopMat2FunctionCall(const TSourceLoc& loc, const TFunction* fnCandidate, TIntermTyped* result, TIntermNode* arguments);
+    void handleVector2CoopMatConversionCall(const TSourceLoc& loc, const TFunction* fnCandidate, TIntermTyped* &result, TIntermNode* arguments);
 
     virtual const char* getGlobalUniformBlockName() const override;
     virtual void finalizeGlobalUniformBlockLayout(TVariable&) override;

+ 77 - 12
3rdparty/glslang/glslang/MachineIndependent/Scan.cpp

@@ -542,6 +542,16 @@ const std::unordered_map<const char*, int, str_hash, str_eq> KeywordMap {
     {"bf16vec3",BF16VEC3},
     {"bf16vec4",BF16VEC4},
 
+    {"floate5m2_t",FLOATE5M2_T},
+    {"fe5m2vec2",FE5M2VEC2},
+    {"fe5m2vec3",FE5M2VEC3},
+    {"fe5m2vec4",FE5M2VEC4},
+
+    {"floate4m3_t",FLOATE4M3_T},
+    {"fe4m3vec2",FE4M3VEC2},
+    {"fe4m3vec3",FE4M3VEC3},
+    {"fe4m3vec4",FE4M3VEC4},
+
     {"float32_t",FLOAT32_T},
     {"f32vec2",F32VEC2},
     {"f32vec3",F32VEC3},
@@ -759,6 +769,8 @@ const std::unordered_map<const char*, int, str_hash, str_eq> KeywordMap {
     {"hitObjectNV",HITOBJECTNV},
     {"hitObjectAttributeNV",HITOBJECTATTRNV},
 
+    {"tensorARM",TENSORARM},
+
     {"__function",FUNCTION},
     {"tensorLayoutNV",TENSORLAYOUTNV},
     {"tensorViewNV",TENSORVIEWNV},
@@ -824,12 +836,22 @@ int TScanContext::tokenize(TPpContext* pp, TParserToken& token)
         loc = ppToken.loc;
         parserToken->sType.lex.loc = loc;
         switch (token) {
-        case ';':  afterType = false; afterBuffer = false; return SEMICOLON;
-        case ',':  afterType = false;   return COMMA;
+        case ';':  afterType = false; afterBuffer = false; inDeclaratorList = false; afterDeclarator = false; angleBracketDepth = 0; squareBracketDepth = 0; parenDepth = 0; return SEMICOLON;
+        case ',':
+            // If we just processed a declarator (identifier after a type), this comma
+            // indicates that we're in a declarator list. Note that 'afterDeclarator' is
+            // only set when we are not inside a template parameter list, array expression,
+            // or function parameter list.
+            if (afterDeclarator) {
+                inDeclaratorList = true;
+            }
+            afterType = false;
+            afterDeclarator = false;
+            return COMMA;
         case ':':                       return COLON;
-        case '=':  afterType = false;   return EQUAL;
-        case '(':  afterType = false;   return LEFT_PAREN;
-        case ')':  afterType = false;   return RIGHT_PAREN;
+        case '=':  afterType = false; inDeclaratorList = false; afterDeclarator = false; return EQUAL;
+        case '(':  afterType = false; inDeclaratorList = false; afterDeclarator = false; parenDepth++; return LEFT_PAREN;
+        case ')':  afterType = false; inDeclaratorList = false; afterDeclarator = false; if (parenDepth > 0) parenDepth--; return RIGHT_PAREN;
         case '.':  field = true;        return DOT;
         case '!':                       return BANG;
         case '-':                       return DASH;
@@ -838,16 +860,16 @@ int TScanContext::tokenize(TPpContext* pp, TParserToken& token)
         case '*':                       return STAR;
         case '/':                       return SLASH;
         case '%':                       return PERCENT;
-        case '<':                       return LEFT_ANGLE;
-        case '>':                       return RIGHT_ANGLE;
+        case '<':                       angleBracketDepth++; return LEFT_ANGLE;
+        case '>':                       if (angleBracketDepth > 0) angleBracketDepth--; return RIGHT_ANGLE;
         case '|':                       return VERTICAL_BAR;
         case '^':                       return CARET;
         case '&':                       return AMPERSAND;
         case '?':                       return QUESTION;
-        case '[':                       return LEFT_BRACKET;
-        case ']':                       return RIGHT_BRACKET;
-        case '{':  afterStruct = false; afterBuffer = false; return LEFT_BRACE;
-        case '}':                       return RIGHT_BRACE;
+        case '[':                       squareBracketDepth++; return LEFT_BRACKET;
+        case ']':                       if (squareBracketDepth > 0) squareBracketDepth--; return RIGHT_BRACKET;
+        case '{':  afterStruct = false; afterBuffer = false; inDeclaratorList = false; afterDeclarator = false; angleBracketDepth = 0; squareBracketDepth = 0; parenDepth = 0; return LEFT_BRACE;
+        case '}':  inDeclaratorList = false; afterDeclarator = false; angleBracketDepth = 0; squareBracketDepth = 0; parenDepth = 0; return RIGHT_BRACE;
         case '\\':
             parseContext.error(loc, "illegal use of escape character", "\\", "");
             break;
@@ -1494,6 +1516,28 @@ int TScanContext::tokenizeIdentifier()
 
         return identifierOrType();
 
+    case FLOATE5M2_T:
+    case FE5M2VEC2:
+    case FE5M2VEC3:
+    case FE5M2VEC4:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_EXT_float_e5m2))
+            return keyword;
+
+        return identifierOrType();
+
+    case FLOATE4M3_T:
+    case FE4M3VEC2:
+    case FE4M3VEC3:
+    case FE4M3VEC4:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_EXT_float_e4m3))
+            return keyword;
+
+        return identifierOrType();
+
     case SAMPLERCUBEARRAY:
     case SAMPLERCUBEARRAYSHADOW:
     case ISAMPLERCUBEARRAY:
@@ -1824,6 +1868,12 @@ int TScanContext::tokenizeIdentifier()
             parseContext.extensionTurnedOn(E_GL_NV_integer_cooperative_matrix))
             return keyword;
         return identifierOrType();
+    case TENSORARM:
+        afterType = true;
+        if (parseContext.symbolTable.atBuiltInLevel() ||
+            parseContext.extensionTurnedOn(E_GL_ARM_tensors))
+            return keyword;
+        return identifierOrType();
 
     case COOPMAT:
         afterType = true;
@@ -1895,14 +1945,29 @@ int TScanContext::identifierOrType()
     if (field)
         return IDENTIFIER;
 
+    // If we see an identifier right after a type, this might be a declarator.
+    // But not in template parameters (inside angle brackets), array expressions (inside square brackets),
+    // or function parameters (inside parentheses)
+    if (afterType && angleBracketDepth == 0 && squareBracketDepth == 0 && parenDepth == 0) {
+        afterDeclarator = true;
+        afterType = false;
+        return IDENTIFIER;
+    }
+
     parserToken->sType.lex.symbol = parseContext.symbolTable.find(*parserToken->sType.lex.string);
     if ((afterType == false && afterStruct == false) && parserToken->sType.lex.symbol != nullptr) {
         if (const TVariable* variable = parserToken->sType.lex.symbol->getAsVariable()) {
             if (variable->isUserType() &&
                 // treat redeclaration of forward-declared buffer/uniform reference as an identifier
                 !(variable->getType().isReference() && afterBuffer)) {
-                afterType = true;
 
+                // If we're in a declarator list (like "float a, B;"), treat struct names as IDENTIFIER
+                // to fix GitHub issue #3931
+                if (inDeclaratorList) {
+                    return IDENTIFIER;
+                }
+                
+                afterType = true;
                 return TYPE_NAME;
             }
         }

+ 6 - 1
3rdparty/glslang/glslang/MachineIndependent/ScanContext.h

@@ -53,7 +53,7 @@ public:
     explicit TScanContext(TParseContextBase& pc) :
         parseContext(pc),
         afterType(false), afterStruct(false),
-        field(false), afterBuffer(false) { }
+        field(false), afterBuffer(false), inDeclaratorList(false), afterDeclarator(false), angleBracketDepth(0), squareBracketDepth(0), parenDepth(0) { }
     virtual ~TScanContext() { }
 
     static void fillInKeywordMap();
@@ -82,6 +82,11 @@ protected:
     bool afterStruct;         // true if we've recognized the STRUCT keyword, so can only be looking for an identifier
     bool field;               // true if we're on a field, right after a '.'
     bool afterBuffer;         // true if we've recognized the BUFFER keyword
+    bool inDeclaratorList;    // true if we detected we're in a declarator list like "float a, b;"
+    bool afterDeclarator;     // true if we just saw an identifier after a type (potential declarator)
+    int angleBracketDepth;    // track nesting level of < > to detect template parameters
+    int squareBracketDepth;   // track nesting level of [ ] to detect array expressions
+    int parenDepth;           // track nesting level of ( ) to detect function parameters
     TSourceLoc loc;
     TParserToken* parserToken;
     TPpToken* ppToken;

+ 6 - 1
3rdparty/glslang/glslang/MachineIndependent/SymbolTable.cpp

@@ -55,7 +55,9 @@ namespace glslang {
 //
 void TType::buildMangledName(TString& mangledName) const
 {
-    if (isMatrix())
+    if (isTensorARM())
+        mangledName += 'T';
+    else if (isMatrix())
         mangledName += 'm';
     else if (isVector())
         mangledName += 'v';
@@ -71,6 +73,8 @@ void TType::buildMangledName(TString& mangledName) const
     case EbtDouble:             mangledName += 'd';      break;
     case EbtFloat16:            mangledName += "f16";    break;
     case EbtBFloat16:           mangledName += "bf16";   break;
+    case EbtFloatE5M2:          mangledName += "fe5m2";  break;
+    case EbtFloatE4M3:          mangledName += "fe4m3";  break;
     case EbtInt8:               mangledName += "i8";     break;
     case EbtUint8:              mangledName += "u8";     break;
     case EbtInt16:              mangledName += "i16";    break;
@@ -421,6 +425,7 @@ TFunction::TFunction(const TFunction& copyOf) : TSymbol(copyOf)
     defined = copyOf.defined;
     prototyped = copyOf.prototyped;
     implicitThis = copyOf.implicitThis;
+    variadic = copyOf.variadic;
     illegalImplicitThis = copyOf.illegalImplicitThis;
     defaultParamCount = copyOf.defaultParamCount;
     spirvInst = copyOf.spirvInst;

+ 18 - 2
3rdparty/glslang/glslang/MachineIndependent/SymbolTable.h

@@ -232,6 +232,13 @@ struct TParameter {
             name = nullptr;
         type = param.type->clone();
         defaultValue = param.defaultValue;
+        if (defaultValue) {
+            // The defaultValue of a builtin is created in a TPoolAllocator that no longer exists
+            // when parsing the user program, so make a deep copy.
+            if (const auto *constUnion = defaultValue->getAsConstantUnion()) {
+                defaultValue = new TIntermConstantUnion(*constUnion->getConstArray().clone(), constUnion->getType());
+            }
+        }
         return *this;
     }
     TBuiltInVariable getDeclaredBuiltIn() const { return type->getQualifier().declaredBuiltIn; }
@@ -245,12 +252,12 @@ public:
     explicit TFunction(TOperator o) :
         TSymbol(nullptr),
         op(o),
-        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) { }
+        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), variadic(false), defaultParamCount(0) { }
     TFunction(const TString *name, const TType& retType, TOperator tOp = EOpNull) :
         TSymbol(name),
         mangledName(*name + '('),
         op(tOp),
-        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0),
+        defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), variadic(false), defaultParamCount(0),
         linkType(ELinkNone)
     {
         returnType.shallowCopy(retType);
@@ -268,6 +275,7 @@ public:
     virtual void addParameter(TParameter& p)
     {
         assert(writable);
+        assert(!variadic && "cannot add more parameters if function is marked variadic");
         parameters.push_back(p);
         p.type->appendMangledName(mangledName);
 
@@ -310,6 +318,13 @@ public:
     virtual bool hasImplicitThis() const { return implicitThis; }
     virtual void setIllegalImplicitThis() { assert(writable); illegalImplicitThis = true; }
     virtual bool hasIllegalImplicitThis() const { return illegalImplicitThis; }
+    virtual void setVariadic() {
+        assert(writable);
+        assert(!variadic && "function was already marked variadic");
+        variadic = true;
+        mangledName += 'z';
+    }
+    virtual bool isVariadic() const { return variadic; }
 
     // Return total number of parameters
     virtual int getParamCount() const { return static_cast<int>(parameters.size()); }
@@ -352,6 +367,7 @@ protected:
                                // even if it finds member variables in the symbol table.
                                // This is important for a static member function that has member variables in scope,
                                // but is not allowed to use them, or see hidden symbols instead.
+    bool variadic;
     int  defaultParamCount;
 
     TSpirvInstruction spirvInst; // SPIR-V instruction qualifiers

+ 44 - 0
3rdparty/glslang/glslang/MachineIndependent/Versions.cpp

@@ -319,11 +319,13 @@ void TParseVersions::initializeExtensionBehavior()
 
     // ARM
     extensionBehavior[E_GL_ARM_shader_core_builtins]                 = EBhDisable;
+    extensionBehavior[E_GL_ARM_tensors]                              = EBhDisable;
 
     // QCOM
     extensionBehavior[E_GL_QCOM_image_processing]                    = EBhDisable;
     extensionBehavior[E_GL_QCOM_image_processing2]                   = EBhDisable;
     extensionBehavior[E_GL_QCOM_tile_shading]                        = EBhDisable;
+    extensionBehavior[E_GL_QCOM_cooperative_matrix_conversion]       = EBhDisable;
 
     // AEP
     extensionBehavior[E_GL_ANDROID_extension_pack_es31a]             = EBhDisable;
@@ -381,6 +383,8 @@ void TParseVersions::initializeExtensionBehavior()
     extensionBehavior[E_GL_EXT_texture_offset_non_const]    = EBhDisable;
     extensionBehavior[E_GL_EXT_nontemporal_keyword]         = EBhDisable;
     extensionBehavior[E_GL_EXT_bfloat16]                    = EBhDisable;
+    extensionBehavior[E_GL_EXT_float_e4m3]                  = EBhDisable;
+    extensionBehavior[E_GL_EXT_float_e5m2]                  = EBhDisable;
 
     // OVR extensions
     extensionBehavior[E_GL_OVR_multiview]                = EBhDisable;
@@ -462,6 +466,7 @@ void TParseVersions::getPreamble(std::string& preamble)
             "#define GL_QCOM_image_processing 1\n"
             "#define GL_QCOM_image_processing2 1\n"
             "#define GL_QCOM_tile_shading 1\n"
+            "#define GL_QCOM_cooperative_matrix_conversion 1\n"
             ;
 
             if (version >= 300) {
@@ -593,6 +598,7 @@ void TParseVersions::getPreamble(std::string& preamble)
             "#define GL_QCOM_image_processing 1\n"
             "#define GL_QCOM_image_processing2 1\n"
             "#define GL_QCOM_tile_shading 1\n"
+            "#define GL_QCOM_cooperative_matrix_conversion 1\n"
 
             "#define GL_EXT_shader_explicit_arithmetic_types 1\n"
             "#define GL_EXT_shader_explicit_arithmetic_types_int8 1\n"
@@ -619,6 +625,8 @@ void TParseVersions::getPreamble(std::string& preamble)
 
             "#define GL_EXT_integer_dot_product 1\n"
             "#define GL_EXT_bfloat16 1\n"
+            "#define GL_EXT_float_e5m2 1\n"
+            "#define GL_EXT_float_e4m3 1\n"
             ;
 
         if (spvVersion.spv == 0) {
@@ -1303,6 +1311,26 @@ void TParseVersions::bfloat16ScalarVectorCheck(const TSourceLoc& loc, const char
     }
 }
 
+void TParseVersions::floate5m2ScalarVectorCheck(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {
+                                           E_GL_EXT_float_e5m2,
+                                         };
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
+void TParseVersions::floate4m3ScalarVectorCheck(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {
+                                           E_GL_EXT_float_e4m3,
+                                         };
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
 // Call for any operation needing GLSL float32 data-type support.
 void TParseVersions::explicitFloat32Check(const TSourceLoc& loc, const char* op, bool builtIn)
 {
@@ -1439,6 +1467,14 @@ void TParseVersions::coopmatCheck(const TSourceLoc& loc, const char* op, bool bu
     }
 }
 
+void TParseVersions::coopmatConverisonCheckQCOM(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+  if (!builtIn) {
+    const char* const extensions[] = {E_GL_KHR_cooperative_matrix};
+    requireExtensions(loc, sizeof(extensions) / sizeof(extensions[0]), extensions, op);
+  }
+}
+
 void TParseVersions::tensorLayoutViewCheck(const TSourceLoc& loc, const char* op, bool builtIn)
 {
     if (!builtIn) {
@@ -1463,6 +1499,14 @@ void TParseVersions::intattachmentCheck(const TSourceLoc& loc, const char* op, b
     }
 }
 
+void TParseVersions::tensorCheckARM(const TSourceLoc& loc, const char* op, bool builtIn)
+{
+    if (!builtIn) {
+        const char* const extensions[] = {E_GL_ARM_tensors};
+        requireExtensions(loc, sizeof(extensions)/sizeof(extensions[0]), extensions, op);
+    }
+}
+
 // Call for any operation removed because SPIR-V is in use.
 void TParseVersions::spvRemoved(const TSourceLoc& loc, const char* op)
 {

+ 4 - 0
3rdparty/glslang/glslang/MachineIndependent/Versions.h

@@ -293,6 +293,7 @@ const char* const E_GL_NV_gpu_shader5                           = "GL_NV_gpu_sha
 
 // ARM
 const char* const E_GL_ARM_shader_core_builtins                 = "GL_ARM_shader_core_builtins";
+const char* const E_GL_ARM_tensors                              = "GL_ARM_tensors";
 
 // Arrays of extensions for the above viewportEXTs duplications
 
@@ -303,6 +304,7 @@ const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]);
 const char* const E_GL_QCOM_image_processing                    = "GL_QCOM_image_processing";
 const char* const E_GL_QCOM_image_processing2                   = "GL_QCOM_image_processing2";
 const char* const E_GL_QCOM_tile_shading                        = "GL_QCOM_tile_shading";
+const char* const E_GL_QCOM_cooperative_matrix_conversion       = "GL_QCOM_cooperative_matrix_conversion";
 
 // AEP
 const char* const E_GL_ANDROID_extension_pack_es31a             = "GL_ANDROID_extension_pack_es31a";
@@ -359,6 +361,8 @@ const char* const E_GL_EXT_texture_shadow_lod = "GL_EXT_texture_shadow_lod";
 const char* const E_GL_EXT_integer_dot_product                    = "GL_EXT_integer_dot_product";
 
 const char* const E_GL_EXT_bfloat16 = "GL_EXT_bfloat16";
+const char* const E_GL_EXT_float_e5m2 = "GL_EXT_float_e5m2";
+const char* const E_GL_EXT_float_e4m3 = "GL_EXT_float_e4m3";
 
 // Arrays of extensions for the above AEP duplications
 

+ 77 - 19
3rdparty/glslang/glslang/MachineIndependent/glslang.y

@@ -146,7 +146,7 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> UTEXTURE2D UTEXTURE3D UTEXTURECUBE UTEXTURE2DARRAY
 
 %token <lex> ATTRIBUTE VARYING
-%token <lex> BFLOAT16_T FLOAT16_T FLOAT32_T DOUBLE FLOAT64_T
+%token <lex> FLOATE5M2_T FLOATE4M3_T BFLOAT16_T FLOAT16_T FLOAT32_T DOUBLE FLOAT64_T
 %token <lex> INT64_T UINT64_T INT32_T UINT32_T INT16_T UINT16_T INT8_T UINT8_T
 %token <lex> I64VEC2 I64VEC3 I64VEC4
 %token <lex> U64VEC2 U64VEC3 U64VEC4
@@ -158,6 +158,8 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> U8VEC2  U8VEC3  U8VEC4
 %token <lex> DVEC2 DVEC3 DVEC4 DMAT2 DMAT3 DMAT4
 %token <lex> BF16VEC2 BF16VEC3 BF16VEC4
+%token <lex> FE5M2VEC2 FE5M2VEC3 FE5M2VEC4
+%token <lex> FE4M3VEC2 FE4M3VEC3 FE4M3VEC4
 %token <lex> F16VEC2 F16VEC3 F16VEC4 F16MAT2 F16MAT3 F16MAT4
 %token <lex> F32VEC2 F32VEC3 F32VEC4 F32MAT2 F32MAT3 F32MAT4
 %token <lex> F64VEC2 F64VEC3 F64VEC4 F64MAT2 F64MAT3 F64MAT4
@@ -182,6 +184,7 @@ extern int yylex(YYSTYPE*, TParseContext&);
 %token <lex> COOPVECNV
 %token <lex> HITOBJECTNV HITOBJECTATTRNV
 %token <lex> TENSORLAYOUTNV TENSORVIEWNV
+%token <lex> TENSORARM
 
 // combined image/sampler
 %token <lex> SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW
@@ -925,15 +928,9 @@ declaration
         parseContext.updateStandaloneQualifierDefaults($1.loc, $1);
         $$ = 0;
     }
-    | type_qualifier IDENTIFIER SEMICOLON {
+    | type_qualifier identifier_list SEMICOLON {
         parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
-        parseContext.addQualifierToExisting($1.loc, $1.qualifier, *$2.string);
-        $$ = 0;
-    }
-    | type_qualifier IDENTIFIER identifier_list SEMICOLON {
-        parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
-        $3->push_back($2.string);
-        parseContext.addQualifierToExisting($1.loc, $1.qualifier, *$3);
+        parseContext.addQualifierToExisting($1.loc, $1.qualifier, *$2);
         $$ = 0;
     }
     ;
@@ -950,9 +947,9 @@ block_structure
     }
 
 identifier_list
-    : COMMA IDENTIFIER {
+    : IDENTIFIER {
         $$ = new TIdentifierList;
-        $$->push_back($2.string);
+        $$->push_back($1.string);
     }
     | identifier_list COMMA IDENTIFIER {
         $$ = $1;
@@ -1037,6 +1034,10 @@ function_header_with_parameters
                 parseContext.vkRelaxedRemapFunctionParameter($1, $3.param);
         }
     }
+    | function_header_with_parameters COMMA DOT DOT DOT {
+        $$ = $1;
+        parseContext.makeVariadic($1, $3.loc);
+    }
     ;
 
 function_header
@@ -1097,6 +1098,11 @@ parameter_declarator
         $$.loc = $2.loc;
         $$.param = param;
     }
+    | type_specifier IDENTIFIER EQUAL initializer {
+        TParameter param = parseContext.getParamWithDefault($1, $2.string, $4, $3.loc);
+        $$.loc = $2.loc;
+        $$.param = param;
+    }
     ;
 
 parameter_declaration
@@ -1107,7 +1113,7 @@ parameter_declaration
         $$ = $2;
         if ($1.qualifier.precision != EpqNone)
             $$.param.type->getQualifier().precision = $1.qualifier.precision;
-        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());
 
         parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
         parseContext.parameterTypeCheck($2.loc, $1.qualifier.storage, *$$.param.type);
@@ -1119,7 +1125,7 @@ parameter_declaration
 
         parseContext.parameterTypeCheck($1.loc, EvqIn, *$1.param.type);
         parseContext.paramCheckFixStorage($1.loc, EvqTemporary, *$$.param.type);
-        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());
     }
     //
     // Without name
@@ -1128,7 +1134,7 @@ parameter_declaration
         $$ = $2;
         if ($1.qualifier.precision != EpqNone)
             $$.param.type->getQualifier().precision = $1.qualifier.precision;
-        parseContext.precisionQualifierCheck($1.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($1.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());
 
         parseContext.checkNoShaderLayouts($1.loc, $1.shaderQualifiers);
         parseContext.parameterTypeCheck($2.loc, $1.qualifier.storage, *$$.param.type);
@@ -1139,7 +1145,7 @@ parameter_declaration
 
         parseContext.parameterTypeCheck($1.loc, EvqIn, *$1.param.type);
         parseContext.paramCheckFixStorage($1.loc, EvqTemporary, *$$.param.type);
-        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->isCoopMatOrVec());
+        parseContext.precisionQualifierCheck($$.loc, $$.param.type->getBasicType(), $$.param.type->getQualifier(), $$.param.type->hasTypeParameter());
     }
     ;
 
@@ -1214,7 +1220,7 @@ fully_specified_type
             parseContext.profileRequires($1.loc, ENoProfile, 120, E_GL_3DL_array_objects, "arrayed type");
             parseContext.profileRequires($1.loc, EEsProfile, 300, 0, "arrayed type");
         }
-        parseContext.precisionQualifierCheck($$.loc, $$.basicType, $$.qualifier, $$.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($$.loc, $$.basicType, $$.qualifier, $$.hasTypeParameter());
     }
     | type_qualifier type_specifier  {
         parseContext.globalQualifierFixCheck($1.loc, $1.qualifier, false, &$2);
@@ -1231,7 +1237,7 @@ fully_specified_type
         parseContext.checkNoShaderLayouts($2.loc, $1.shaderQualifiers);
         $2.shaderQualifiers.merge($1.shaderQualifiers);
         parseContext.mergeQualifiers($2.loc, $2.qualifier, $1.qualifier, true);
-        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.hasTypeParameter());
 
         $$ = $2;
 
@@ -1943,6 +1949,16 @@ type_specifier_nonarray
         $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
         $$.basicType = EbtBFloat16;
     }
+    | FLOATE5M2_T {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "floate5m2_t", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+    }
+    | FLOATE4M3_T {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "floate4m3_t", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+    }
     | FLOAT16_T {
         parseContext.float16ScalarVectorCheck($1.loc, "float16_t", parseContext.symbolTable.atBuiltInLevel());
         $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
@@ -2040,6 +2056,42 @@ type_specifier_nonarray
         $$.basicType = EbtBFloat16;
         $$.setVector(4);
     }
+    | FE5M2VEC2 {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "fe5m2 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+        $$.setVector(2);
+    }
+    | FE5M2VEC3 {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "fe5m2 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+        $$.setVector(3);
+    }
+    | FE5M2VEC4 {
+        parseContext.floate5m2ScalarVectorCheck($1.loc, "fe5m2 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE5M2;
+        $$.setVector(4);
+    }
+    | FE4M3VEC2 {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "fe4m3 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+        $$.setVector(2);
+    }
+    | FE4M3VEC3 {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "fe4m3 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+        $$.setVector(3);
+    }
+    | FE4M3VEC4 {
+        parseContext.floate4m3ScalarVectorCheck($1.loc, "fe4m3 vector", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.basicType = EbtFloatE4M3;
+        $$.setVector(4);
+    }
     | F16VEC2 {
         parseContext.float16ScalarVectorCheck($1.loc, "half float vector", parseContext.symbolTable.atBuiltInLevel());
         $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
@@ -3585,6 +3637,12 @@ type_specifier_nonarray
         $$.basicType = EbtCoopvecNV;
         $$.coopvecNV = true;
     }
+    | TENSORARM {
+        parseContext.tensorCheckARM($1.loc, "tensorARM", parseContext.symbolTable.atBuiltInLevel());
+        $$.init($1.loc, parseContext.symbolTable.atGlobalLevel());
+        $$.tensorRankARM = 1; // placeholder value
+        $$.basicType = EbtTensorARM;
+    }
     | spirv_type_specifier {
         parseContext.requireExtensions($1.loc, 1, &E_GL_EXT_spirv_intrinsics, "SPIR-V type specifier");
         $$ = $1;
@@ -3686,7 +3744,7 @@ struct_declaration
         $$ = $2;
 
         parseContext.voidErrorCheck($1.loc, (*$2)[0].type->getFieldName(), $1.basicType);
-        parseContext.precisionQualifierCheck($1.loc, $1.basicType, $1.qualifier, $1.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($1.loc, $1.basicType, $1.qualifier, $1.hasTypeParameter());
 
         for (unsigned int i = 0; i < $$->size(); ++i) {
             TType type($1);
@@ -3710,7 +3768,7 @@ struct_declaration
         parseContext.memberQualifierCheck($1);
         parseContext.voidErrorCheck($2.loc, (*$3)[0].type->getFieldName(), $2.basicType);
         parseContext.mergeQualifiers($2.loc, $2.qualifier, $1.qualifier, true);
-        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.isCoopmatOrvec());
+        parseContext.precisionQualifierCheck($2.loc, $2.basicType, $2.qualifier, $2.hasTypeParameter());
 
         for (unsigned int i = 0; i < $$->size(); ++i) {
             TType type($2);

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 552 - 543
3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp


+ 421 - 412
3rdparty/glslang/glslang/MachineIndependent/glslang_tab.cpp.h

@@ -114,417 +114,426 @@ extern int yydebug;
     UTEXTURE2DARRAY = 315,         /* UTEXTURE2DARRAY  */
     ATTRIBUTE = 316,               /* ATTRIBUTE  */
     VARYING = 317,                 /* VARYING  */
-    BFLOAT16_T = 318,              /* BFLOAT16_T  */
-    FLOAT16_T = 319,               /* FLOAT16_T  */
-    FLOAT32_T = 320,               /* FLOAT32_T  */
-    DOUBLE = 321,                  /* DOUBLE  */
-    FLOAT64_T = 322,               /* FLOAT64_T  */
-    INT64_T = 323,                 /* INT64_T  */
-    UINT64_T = 324,                /* UINT64_T  */
-    INT32_T = 325,                 /* INT32_T  */
-    UINT32_T = 326,                /* UINT32_T  */
-    INT16_T = 327,                 /* INT16_T  */
-    UINT16_T = 328,                /* UINT16_T  */
-    INT8_T = 329,                  /* INT8_T  */
-    UINT8_T = 330,                 /* UINT8_T  */
-    I64VEC2 = 331,                 /* I64VEC2  */
-    I64VEC3 = 332,                 /* I64VEC3  */
-    I64VEC4 = 333,                 /* I64VEC4  */
-    U64VEC2 = 334,                 /* U64VEC2  */
-    U64VEC3 = 335,                 /* U64VEC3  */
-    U64VEC4 = 336,                 /* U64VEC4  */
-    I32VEC2 = 337,                 /* I32VEC2  */
-    I32VEC3 = 338,                 /* I32VEC3  */
-    I32VEC4 = 339,                 /* I32VEC4  */
-    U32VEC2 = 340,                 /* U32VEC2  */
-    U32VEC3 = 341,                 /* U32VEC3  */
-    U32VEC4 = 342,                 /* U32VEC4  */
-    I16VEC2 = 343,                 /* I16VEC2  */
-    I16VEC3 = 344,                 /* I16VEC3  */
-    I16VEC4 = 345,                 /* I16VEC4  */
-    U16VEC2 = 346,                 /* U16VEC2  */
-    U16VEC3 = 347,                 /* U16VEC3  */
-    U16VEC4 = 348,                 /* U16VEC4  */
-    I8VEC2 = 349,                  /* I8VEC2  */
-    I8VEC3 = 350,                  /* I8VEC3  */
-    I8VEC4 = 351,                  /* I8VEC4  */
-    U8VEC2 = 352,                  /* U8VEC2  */
-    U8VEC3 = 353,                  /* U8VEC3  */
-    U8VEC4 = 354,                  /* U8VEC4  */
-    DVEC2 = 355,                   /* DVEC2  */
-    DVEC3 = 356,                   /* DVEC3  */
-    DVEC4 = 357,                   /* DVEC4  */
-    DMAT2 = 358,                   /* DMAT2  */
-    DMAT3 = 359,                   /* DMAT3  */
-    DMAT4 = 360,                   /* DMAT4  */
-    BF16VEC2 = 361,                /* BF16VEC2  */
-    BF16VEC3 = 362,                /* BF16VEC3  */
-    BF16VEC4 = 363,                /* BF16VEC4  */
-    F16VEC2 = 364,                 /* F16VEC2  */
-    F16VEC3 = 365,                 /* F16VEC3  */
-    F16VEC4 = 366,                 /* F16VEC4  */
-    F16MAT2 = 367,                 /* F16MAT2  */
-    F16MAT3 = 368,                 /* F16MAT3  */
-    F16MAT4 = 369,                 /* F16MAT4  */
-    F32VEC2 = 370,                 /* F32VEC2  */
-    F32VEC3 = 371,                 /* F32VEC3  */
-    F32VEC4 = 372,                 /* F32VEC4  */
-    F32MAT2 = 373,                 /* F32MAT2  */
-    F32MAT3 = 374,                 /* F32MAT3  */
-    F32MAT4 = 375,                 /* F32MAT4  */
-    F64VEC2 = 376,                 /* F64VEC2  */
-    F64VEC3 = 377,                 /* F64VEC3  */
-    F64VEC4 = 378,                 /* F64VEC4  */
-    F64MAT2 = 379,                 /* F64MAT2  */
-    F64MAT3 = 380,                 /* F64MAT3  */
-    F64MAT4 = 381,                 /* F64MAT4  */
-    DMAT2X2 = 382,                 /* DMAT2X2  */
-    DMAT2X3 = 383,                 /* DMAT2X3  */
-    DMAT2X4 = 384,                 /* DMAT2X4  */
-    DMAT3X2 = 385,                 /* DMAT3X2  */
-    DMAT3X3 = 386,                 /* DMAT3X3  */
-    DMAT3X4 = 387,                 /* DMAT3X4  */
-    DMAT4X2 = 388,                 /* DMAT4X2  */
-    DMAT4X3 = 389,                 /* DMAT4X3  */
-    DMAT4X4 = 390,                 /* DMAT4X4  */
-    F16MAT2X2 = 391,               /* F16MAT2X2  */
-    F16MAT2X3 = 392,               /* F16MAT2X3  */
-    F16MAT2X4 = 393,               /* F16MAT2X4  */
-    F16MAT3X2 = 394,               /* F16MAT3X2  */
-    F16MAT3X3 = 395,               /* F16MAT3X3  */
-    F16MAT3X4 = 396,               /* F16MAT3X4  */
-    F16MAT4X2 = 397,               /* F16MAT4X2  */
-    F16MAT4X3 = 398,               /* F16MAT4X3  */
-    F16MAT4X4 = 399,               /* F16MAT4X4  */
-    F32MAT2X2 = 400,               /* F32MAT2X2  */
-    F32MAT2X3 = 401,               /* F32MAT2X3  */
-    F32MAT2X4 = 402,               /* F32MAT2X4  */
-    F32MAT3X2 = 403,               /* F32MAT3X2  */
-    F32MAT3X3 = 404,               /* F32MAT3X3  */
-    F32MAT3X4 = 405,               /* F32MAT3X4  */
-    F32MAT4X2 = 406,               /* F32MAT4X2  */
-    F32MAT4X3 = 407,               /* F32MAT4X3  */
-    F32MAT4X4 = 408,               /* F32MAT4X4  */
-    F64MAT2X2 = 409,               /* F64MAT2X2  */
-    F64MAT2X3 = 410,               /* F64MAT2X3  */
-    F64MAT2X4 = 411,               /* F64MAT2X4  */
-    F64MAT3X2 = 412,               /* F64MAT3X2  */
-    F64MAT3X3 = 413,               /* F64MAT3X3  */
-    F64MAT3X4 = 414,               /* F64MAT3X4  */
-    F64MAT4X2 = 415,               /* F64MAT4X2  */
-    F64MAT4X3 = 416,               /* F64MAT4X3  */
-    F64MAT4X4 = 417,               /* F64MAT4X4  */
-    ATOMIC_UINT = 418,             /* ATOMIC_UINT  */
-    ACCSTRUCTNV = 419,             /* ACCSTRUCTNV  */
-    ACCSTRUCTEXT = 420,            /* ACCSTRUCTEXT  */
-    RAYQUERYEXT = 421,             /* RAYQUERYEXT  */
-    FCOOPMATNV = 422,              /* FCOOPMATNV  */
-    ICOOPMATNV = 423,              /* ICOOPMATNV  */
-    UCOOPMATNV = 424,              /* UCOOPMATNV  */
-    COOPMAT = 425,                 /* COOPMAT  */
-    COOPVECNV = 426,               /* COOPVECNV  */
-    HITOBJECTNV = 427,             /* HITOBJECTNV  */
-    HITOBJECTATTRNV = 428,         /* HITOBJECTATTRNV  */
-    TENSORLAYOUTNV = 429,          /* TENSORLAYOUTNV  */
-    TENSORVIEWNV = 430,            /* TENSORVIEWNV  */
-    SAMPLERCUBEARRAY = 431,        /* SAMPLERCUBEARRAY  */
-    SAMPLERCUBEARRAYSHADOW = 432,  /* SAMPLERCUBEARRAYSHADOW  */
-    ISAMPLERCUBEARRAY = 433,       /* ISAMPLERCUBEARRAY  */
-    USAMPLERCUBEARRAY = 434,       /* USAMPLERCUBEARRAY  */
-    SAMPLER1D = 435,               /* SAMPLER1D  */
-    SAMPLER1DARRAY = 436,          /* SAMPLER1DARRAY  */
-    SAMPLER1DARRAYSHADOW = 437,    /* SAMPLER1DARRAYSHADOW  */
-    ISAMPLER1D = 438,              /* ISAMPLER1D  */
-    SAMPLER1DSHADOW = 439,         /* SAMPLER1DSHADOW  */
-    SAMPLER2DRECT = 440,           /* SAMPLER2DRECT  */
-    SAMPLER2DRECTSHADOW = 441,     /* SAMPLER2DRECTSHADOW  */
-    ISAMPLER2DRECT = 442,          /* ISAMPLER2DRECT  */
-    USAMPLER2DRECT = 443,          /* USAMPLER2DRECT  */
-    SAMPLERBUFFER = 444,           /* SAMPLERBUFFER  */
-    ISAMPLERBUFFER = 445,          /* ISAMPLERBUFFER  */
-    USAMPLERBUFFER = 446,          /* USAMPLERBUFFER  */
-    SAMPLER2DMS = 447,             /* SAMPLER2DMS  */
-    ISAMPLER2DMS = 448,            /* ISAMPLER2DMS  */
-    USAMPLER2DMS = 449,            /* USAMPLER2DMS  */
-    SAMPLER2DMSARRAY = 450,        /* SAMPLER2DMSARRAY  */
-    ISAMPLER2DMSARRAY = 451,       /* ISAMPLER2DMSARRAY  */
-    USAMPLER2DMSARRAY = 452,       /* USAMPLER2DMSARRAY  */
-    SAMPLEREXTERNALOES = 453,      /* SAMPLEREXTERNALOES  */
-    SAMPLEREXTERNAL2DY2YEXT = 454, /* SAMPLEREXTERNAL2DY2YEXT  */
-    ISAMPLER1DARRAY = 455,         /* ISAMPLER1DARRAY  */
-    USAMPLER1D = 456,              /* USAMPLER1D  */
-    USAMPLER1DARRAY = 457,         /* USAMPLER1DARRAY  */
-    F16SAMPLER1D = 458,            /* F16SAMPLER1D  */
-    F16SAMPLER2D = 459,            /* F16SAMPLER2D  */
-    F16SAMPLER3D = 460,            /* F16SAMPLER3D  */
-    F16SAMPLER2DRECT = 461,        /* F16SAMPLER2DRECT  */
-    F16SAMPLERCUBE = 462,          /* F16SAMPLERCUBE  */
-    F16SAMPLER1DARRAY = 463,       /* F16SAMPLER1DARRAY  */
-    F16SAMPLER2DARRAY = 464,       /* F16SAMPLER2DARRAY  */
-    F16SAMPLERCUBEARRAY = 465,     /* F16SAMPLERCUBEARRAY  */
-    F16SAMPLERBUFFER = 466,        /* F16SAMPLERBUFFER  */
-    F16SAMPLER2DMS = 467,          /* F16SAMPLER2DMS  */
-    F16SAMPLER2DMSARRAY = 468,     /* F16SAMPLER2DMSARRAY  */
-    F16SAMPLER1DSHADOW = 469,      /* F16SAMPLER1DSHADOW  */
-    F16SAMPLER2DSHADOW = 470,      /* F16SAMPLER2DSHADOW  */
-    F16SAMPLER1DARRAYSHADOW = 471, /* F16SAMPLER1DARRAYSHADOW  */
-    F16SAMPLER2DARRAYSHADOW = 472, /* F16SAMPLER2DARRAYSHADOW  */
-    F16SAMPLER2DRECTSHADOW = 473,  /* F16SAMPLER2DRECTSHADOW  */
-    F16SAMPLERCUBESHADOW = 474,    /* F16SAMPLERCUBESHADOW  */
-    F16SAMPLERCUBEARRAYSHADOW = 475, /* F16SAMPLERCUBEARRAYSHADOW  */
-    IMAGE1D = 476,                 /* IMAGE1D  */
-    IIMAGE1D = 477,                /* IIMAGE1D  */
-    UIMAGE1D = 478,                /* UIMAGE1D  */
-    IMAGE2D = 479,                 /* IMAGE2D  */
-    IIMAGE2D = 480,                /* IIMAGE2D  */
-    UIMAGE2D = 481,                /* UIMAGE2D  */
-    IMAGE3D = 482,                 /* IMAGE3D  */
-    IIMAGE3D = 483,                /* IIMAGE3D  */
-    UIMAGE3D = 484,                /* UIMAGE3D  */
-    IMAGE2DRECT = 485,             /* IMAGE2DRECT  */
-    IIMAGE2DRECT = 486,            /* IIMAGE2DRECT  */
-    UIMAGE2DRECT = 487,            /* UIMAGE2DRECT  */
-    IMAGECUBE = 488,               /* IMAGECUBE  */
-    IIMAGECUBE = 489,              /* IIMAGECUBE  */
-    UIMAGECUBE = 490,              /* UIMAGECUBE  */
-    IMAGEBUFFER = 491,             /* IMAGEBUFFER  */
-    IIMAGEBUFFER = 492,            /* IIMAGEBUFFER  */
-    UIMAGEBUFFER = 493,            /* UIMAGEBUFFER  */
-    IMAGE1DARRAY = 494,            /* IMAGE1DARRAY  */
-    IIMAGE1DARRAY = 495,           /* IIMAGE1DARRAY  */
-    UIMAGE1DARRAY = 496,           /* UIMAGE1DARRAY  */
-    IMAGE2DARRAY = 497,            /* IMAGE2DARRAY  */
-    IIMAGE2DARRAY = 498,           /* IIMAGE2DARRAY  */
-    UIMAGE2DARRAY = 499,           /* UIMAGE2DARRAY  */
-    IMAGECUBEARRAY = 500,          /* IMAGECUBEARRAY  */
-    IIMAGECUBEARRAY = 501,         /* IIMAGECUBEARRAY  */
-    UIMAGECUBEARRAY = 502,         /* UIMAGECUBEARRAY  */
-    IMAGE2DMS = 503,               /* IMAGE2DMS  */
-    IIMAGE2DMS = 504,              /* IIMAGE2DMS  */
-    UIMAGE2DMS = 505,              /* UIMAGE2DMS  */
-    IMAGE2DMSARRAY = 506,          /* IMAGE2DMSARRAY  */
-    IIMAGE2DMSARRAY = 507,         /* IIMAGE2DMSARRAY  */
-    UIMAGE2DMSARRAY = 508,         /* UIMAGE2DMSARRAY  */
-    F16IMAGE1D = 509,              /* F16IMAGE1D  */
-    F16IMAGE2D = 510,              /* F16IMAGE2D  */
-    F16IMAGE3D = 511,              /* F16IMAGE3D  */
-    F16IMAGE2DRECT = 512,          /* F16IMAGE2DRECT  */
-    F16IMAGECUBE = 513,            /* F16IMAGECUBE  */
-    F16IMAGE1DARRAY = 514,         /* F16IMAGE1DARRAY  */
-    F16IMAGE2DARRAY = 515,         /* F16IMAGE2DARRAY  */
-    F16IMAGECUBEARRAY = 516,       /* F16IMAGECUBEARRAY  */
-    F16IMAGEBUFFER = 517,          /* F16IMAGEBUFFER  */
-    F16IMAGE2DMS = 518,            /* F16IMAGE2DMS  */
-    F16IMAGE2DMSARRAY = 519,       /* F16IMAGE2DMSARRAY  */
-    I64IMAGE1D = 520,              /* I64IMAGE1D  */
-    U64IMAGE1D = 521,              /* U64IMAGE1D  */
-    I64IMAGE2D = 522,              /* I64IMAGE2D  */
-    U64IMAGE2D = 523,              /* U64IMAGE2D  */
-    I64IMAGE3D = 524,              /* I64IMAGE3D  */
-    U64IMAGE3D = 525,              /* U64IMAGE3D  */
-    I64IMAGE2DRECT = 526,          /* I64IMAGE2DRECT  */
-    U64IMAGE2DRECT = 527,          /* U64IMAGE2DRECT  */
-    I64IMAGECUBE = 528,            /* I64IMAGECUBE  */
-    U64IMAGECUBE = 529,            /* U64IMAGECUBE  */
-    I64IMAGEBUFFER = 530,          /* I64IMAGEBUFFER  */
-    U64IMAGEBUFFER = 531,          /* U64IMAGEBUFFER  */
-    I64IMAGE1DARRAY = 532,         /* I64IMAGE1DARRAY  */
-    U64IMAGE1DARRAY = 533,         /* U64IMAGE1DARRAY  */
-    I64IMAGE2DARRAY = 534,         /* I64IMAGE2DARRAY  */
-    U64IMAGE2DARRAY = 535,         /* U64IMAGE2DARRAY  */
-    I64IMAGECUBEARRAY = 536,       /* I64IMAGECUBEARRAY  */
-    U64IMAGECUBEARRAY = 537,       /* U64IMAGECUBEARRAY  */
-    I64IMAGE2DMS = 538,            /* I64IMAGE2DMS  */
-    U64IMAGE2DMS = 539,            /* U64IMAGE2DMS  */
-    I64IMAGE2DMSARRAY = 540,       /* I64IMAGE2DMSARRAY  */
-    U64IMAGE2DMSARRAY = 541,       /* U64IMAGE2DMSARRAY  */
-    TEXTURECUBEARRAY = 542,        /* TEXTURECUBEARRAY  */
-    ITEXTURECUBEARRAY = 543,       /* ITEXTURECUBEARRAY  */
-    UTEXTURECUBEARRAY = 544,       /* UTEXTURECUBEARRAY  */
-    TEXTURE1D = 545,               /* TEXTURE1D  */
-    ITEXTURE1D = 546,              /* ITEXTURE1D  */
-    UTEXTURE1D = 547,              /* UTEXTURE1D  */
-    TEXTURE1DARRAY = 548,          /* TEXTURE1DARRAY  */
-    ITEXTURE1DARRAY = 549,         /* ITEXTURE1DARRAY  */
-    UTEXTURE1DARRAY = 550,         /* UTEXTURE1DARRAY  */
-    TEXTURE2DRECT = 551,           /* TEXTURE2DRECT  */
-    ITEXTURE2DRECT = 552,          /* ITEXTURE2DRECT  */
-    UTEXTURE2DRECT = 553,          /* UTEXTURE2DRECT  */
-    TEXTUREBUFFER = 554,           /* TEXTUREBUFFER  */
-    ITEXTUREBUFFER = 555,          /* ITEXTUREBUFFER  */
-    UTEXTUREBUFFER = 556,          /* UTEXTUREBUFFER  */
-    TEXTURE2DMS = 557,             /* TEXTURE2DMS  */
-    ITEXTURE2DMS = 558,            /* ITEXTURE2DMS  */
-    UTEXTURE2DMS = 559,            /* UTEXTURE2DMS  */
-    TEXTURE2DMSARRAY = 560,        /* TEXTURE2DMSARRAY  */
-    ITEXTURE2DMSARRAY = 561,       /* ITEXTURE2DMSARRAY  */
-    UTEXTURE2DMSARRAY = 562,       /* UTEXTURE2DMSARRAY  */
-    F16TEXTURE1D = 563,            /* F16TEXTURE1D  */
-    F16TEXTURE2D = 564,            /* F16TEXTURE2D  */
-    F16TEXTURE3D = 565,            /* F16TEXTURE3D  */
-    F16TEXTURE2DRECT = 566,        /* F16TEXTURE2DRECT  */
-    F16TEXTURECUBE = 567,          /* F16TEXTURECUBE  */
-    F16TEXTURE1DARRAY = 568,       /* F16TEXTURE1DARRAY  */
-    F16TEXTURE2DARRAY = 569,       /* F16TEXTURE2DARRAY  */
-    F16TEXTURECUBEARRAY = 570,     /* F16TEXTURECUBEARRAY  */
-    F16TEXTUREBUFFER = 571,        /* F16TEXTUREBUFFER  */
-    F16TEXTURE2DMS = 572,          /* F16TEXTURE2DMS  */
-    F16TEXTURE2DMSARRAY = 573,     /* F16TEXTURE2DMSARRAY  */
-    SUBPASSINPUT = 574,            /* SUBPASSINPUT  */
-    SUBPASSINPUTMS = 575,          /* SUBPASSINPUTMS  */
-    ISUBPASSINPUT = 576,           /* ISUBPASSINPUT  */
-    ISUBPASSINPUTMS = 577,         /* ISUBPASSINPUTMS  */
-    USUBPASSINPUT = 578,           /* USUBPASSINPUT  */
-    USUBPASSINPUTMS = 579,         /* USUBPASSINPUTMS  */
-    F16SUBPASSINPUT = 580,         /* F16SUBPASSINPUT  */
-    F16SUBPASSINPUTMS = 581,       /* F16SUBPASSINPUTMS  */
-    SPIRV_INSTRUCTION = 582,       /* SPIRV_INSTRUCTION  */
-    SPIRV_EXECUTION_MODE = 583,    /* SPIRV_EXECUTION_MODE  */
-    SPIRV_EXECUTION_MODE_ID = 584, /* SPIRV_EXECUTION_MODE_ID  */
-    SPIRV_DECORATE = 585,          /* SPIRV_DECORATE  */
-    SPIRV_DECORATE_ID = 586,       /* SPIRV_DECORATE_ID  */
-    SPIRV_DECORATE_STRING = 587,   /* SPIRV_DECORATE_STRING  */
-    SPIRV_TYPE = 588,              /* SPIRV_TYPE  */
-    SPIRV_STORAGE_CLASS = 589,     /* SPIRV_STORAGE_CLASS  */
-    SPIRV_BY_REFERENCE = 590,      /* SPIRV_BY_REFERENCE  */
-    SPIRV_LITERAL = 591,           /* SPIRV_LITERAL  */
-    ATTACHMENTEXT = 592,           /* ATTACHMENTEXT  */
-    IATTACHMENTEXT = 593,          /* IATTACHMENTEXT  */
-    UATTACHMENTEXT = 594,          /* UATTACHMENTEXT  */
-    LEFT_OP = 595,                 /* LEFT_OP  */
-    RIGHT_OP = 596,                /* RIGHT_OP  */
-    INC_OP = 597,                  /* INC_OP  */
-    DEC_OP = 598,                  /* DEC_OP  */
-    LE_OP = 599,                   /* LE_OP  */
-    GE_OP = 600,                   /* GE_OP  */
-    EQ_OP = 601,                   /* EQ_OP  */
-    NE_OP = 602,                   /* NE_OP  */
-    AND_OP = 603,                  /* AND_OP  */
-    OR_OP = 604,                   /* OR_OP  */
-    XOR_OP = 605,                  /* XOR_OP  */
-    MUL_ASSIGN = 606,              /* MUL_ASSIGN  */
-    DIV_ASSIGN = 607,              /* DIV_ASSIGN  */
-    ADD_ASSIGN = 608,              /* ADD_ASSIGN  */
-    MOD_ASSIGN = 609,              /* MOD_ASSIGN  */
-    LEFT_ASSIGN = 610,             /* LEFT_ASSIGN  */
-    RIGHT_ASSIGN = 611,            /* RIGHT_ASSIGN  */
-    AND_ASSIGN = 612,              /* AND_ASSIGN  */
-    XOR_ASSIGN = 613,              /* XOR_ASSIGN  */
-    OR_ASSIGN = 614,               /* OR_ASSIGN  */
-    SUB_ASSIGN = 615,              /* SUB_ASSIGN  */
-    STRING_LITERAL = 616,          /* STRING_LITERAL  */
-    LEFT_PAREN = 617,              /* LEFT_PAREN  */
-    RIGHT_PAREN = 618,             /* RIGHT_PAREN  */
-    LEFT_BRACKET = 619,            /* LEFT_BRACKET  */
-    RIGHT_BRACKET = 620,           /* RIGHT_BRACKET  */
-    LEFT_BRACE = 621,              /* LEFT_BRACE  */
-    RIGHT_BRACE = 622,             /* RIGHT_BRACE  */
-    DOT = 623,                     /* DOT  */
-    COMMA = 624,                   /* COMMA  */
-    COLON = 625,                   /* COLON  */
-    EQUAL = 626,                   /* EQUAL  */
-    SEMICOLON = 627,               /* SEMICOLON  */
-    BANG = 628,                    /* BANG  */
-    DASH = 629,                    /* DASH  */
-    TILDE = 630,                   /* TILDE  */
-    PLUS = 631,                    /* PLUS  */
-    STAR = 632,                    /* STAR  */
-    SLASH = 633,                   /* SLASH  */
-    PERCENT = 634,                 /* PERCENT  */
-    LEFT_ANGLE = 635,              /* LEFT_ANGLE  */
-    RIGHT_ANGLE = 636,             /* RIGHT_ANGLE  */
-    VERTICAL_BAR = 637,            /* VERTICAL_BAR  */
-    CARET = 638,                   /* CARET  */
-    AMPERSAND = 639,               /* AMPERSAND  */
-    QUESTION = 640,                /* QUESTION  */
-    INVARIANT = 641,               /* INVARIANT  */
-    HIGH_PRECISION = 642,          /* HIGH_PRECISION  */
-    MEDIUM_PRECISION = 643,        /* MEDIUM_PRECISION  */
-    LOW_PRECISION = 644,           /* LOW_PRECISION  */
-    PRECISION = 645,               /* PRECISION  */
-    PACKED = 646,                  /* PACKED  */
-    RESOURCE = 647,                /* RESOURCE  */
-    SUPERP = 648,                  /* SUPERP  */
-    FLOATCONSTANT = 649,           /* FLOATCONSTANT  */
-    INTCONSTANT = 650,             /* INTCONSTANT  */
-    UINTCONSTANT = 651,            /* UINTCONSTANT  */
-    BOOLCONSTANT = 652,            /* BOOLCONSTANT  */
-    IDENTIFIER = 653,              /* IDENTIFIER  */
-    TYPE_NAME = 654,               /* TYPE_NAME  */
-    CENTROID = 655,                /* CENTROID  */
-    IN = 656,                      /* IN  */
-    OUT = 657,                     /* OUT  */
-    INOUT = 658,                   /* INOUT  */
-    STRUCT = 659,                  /* STRUCT  */
-    VOID = 660,                    /* VOID  */
-    WHILE = 661,                   /* WHILE  */
-    BREAK = 662,                   /* BREAK  */
-    CONTINUE = 663,                /* CONTINUE  */
-    DO = 664,                      /* DO  */
-    ELSE = 665,                    /* ELSE  */
-    FOR = 666,                     /* FOR  */
-    IF = 667,                      /* IF  */
-    DISCARD = 668,                 /* DISCARD  */
-    RETURN = 669,                  /* RETURN  */
-    SWITCH = 670,                  /* SWITCH  */
-    CASE = 671,                    /* CASE  */
-    DEFAULT = 672,                 /* DEFAULT  */
-    TERMINATE_INVOCATION = 673,    /* TERMINATE_INVOCATION  */
-    TERMINATE_RAY = 674,           /* TERMINATE_RAY  */
-    IGNORE_INTERSECTION = 675,     /* IGNORE_INTERSECTION  */
-    UNIFORM = 676,                 /* UNIFORM  */
-    SHARED = 677,                  /* SHARED  */
-    BUFFER = 678,                  /* BUFFER  */
-    TILEIMAGEEXT = 679,            /* TILEIMAGEEXT  */
-    FLAT = 680,                    /* FLAT  */
-    SMOOTH = 681,                  /* SMOOTH  */
-    LAYOUT = 682,                  /* LAYOUT  */
-    DOUBLECONSTANT = 683,          /* DOUBLECONSTANT  */
-    INT16CONSTANT = 684,           /* INT16CONSTANT  */
-    UINT16CONSTANT = 685,          /* UINT16CONSTANT  */
-    FLOAT16CONSTANT = 686,         /* FLOAT16CONSTANT  */
-    INT32CONSTANT = 687,           /* INT32CONSTANT  */
-    UINT32CONSTANT = 688,          /* UINT32CONSTANT  */
-    INT64CONSTANT = 689,           /* INT64CONSTANT  */
-    UINT64CONSTANT = 690,          /* UINT64CONSTANT  */
-    SUBROUTINE = 691,              /* SUBROUTINE  */
-    DEMOTE = 692,                  /* DEMOTE  */
-    FUNCTION = 693,                /* FUNCTION  */
-    PAYLOADNV = 694,               /* PAYLOADNV  */
-    PAYLOADINNV = 695,             /* PAYLOADINNV  */
-    HITATTRNV = 696,               /* HITATTRNV  */
-    CALLDATANV = 697,              /* CALLDATANV  */
-    CALLDATAINNV = 698,            /* CALLDATAINNV  */
-    PAYLOADEXT = 699,              /* PAYLOADEXT  */
-    PAYLOADINEXT = 700,            /* PAYLOADINEXT  */
-    HITATTREXT = 701,              /* HITATTREXT  */
-    CALLDATAEXT = 702,             /* CALLDATAEXT  */
-    CALLDATAINEXT = 703,           /* CALLDATAINEXT  */
-    PATCH = 704,                   /* PATCH  */
-    SAMPLE = 705,                  /* SAMPLE  */
-    NONUNIFORM = 706,              /* NONUNIFORM  */
-    COHERENT = 707,                /* COHERENT  */
-    VOLATILE = 708,                /* VOLATILE  */
-    RESTRICT = 709,                /* RESTRICT  */
-    READONLY = 710,                /* READONLY  */
-    WRITEONLY = 711,               /* WRITEONLY  */
-    NONTEMPORAL = 712,             /* NONTEMPORAL  */
-    DEVICECOHERENT = 713,          /* DEVICECOHERENT  */
-    QUEUEFAMILYCOHERENT = 714,     /* QUEUEFAMILYCOHERENT  */
-    WORKGROUPCOHERENT = 715,       /* WORKGROUPCOHERENT  */
-    SUBGROUPCOHERENT = 716,        /* SUBGROUPCOHERENT  */
-    NONPRIVATE = 717,              /* NONPRIVATE  */
-    SHADERCALLCOHERENT = 718,      /* SHADERCALLCOHERENT  */
-    NOPERSPECTIVE = 719,           /* NOPERSPECTIVE  */
-    EXPLICITINTERPAMD = 720,       /* EXPLICITINTERPAMD  */
-    PERVERTEXEXT = 721,            /* PERVERTEXEXT  */
-    PERVERTEXNV = 722,             /* PERVERTEXNV  */
-    PERPRIMITIVENV = 723,          /* PERPRIMITIVENV  */
-    PERVIEWNV = 724,               /* PERVIEWNV  */
-    PERTASKNV = 725,               /* PERTASKNV  */
-    PERPRIMITIVEEXT = 726,         /* PERPRIMITIVEEXT  */
-    TASKPAYLOADWORKGROUPEXT = 727, /* TASKPAYLOADWORKGROUPEXT  */
-    PRECISE = 728                  /* PRECISE  */
+    FLOATE5M2_T = 318,             /* FLOATE5M2_T  */
+    FLOATE4M3_T = 319,             /* FLOATE4M3_T  */
+    BFLOAT16_T = 320,              /* BFLOAT16_T  */
+    FLOAT16_T = 321,               /* FLOAT16_T  */
+    FLOAT32_T = 322,               /* FLOAT32_T  */
+    DOUBLE = 323,                  /* DOUBLE  */
+    FLOAT64_T = 324,               /* FLOAT64_T  */
+    INT64_T = 325,                 /* INT64_T  */
+    UINT64_T = 326,                /* UINT64_T  */
+    INT32_T = 327,                 /* INT32_T  */
+    UINT32_T = 328,                /* UINT32_T  */
+    INT16_T = 329,                 /* INT16_T  */
+    UINT16_T = 330,                /* UINT16_T  */
+    INT8_T = 331,                  /* INT8_T  */
+    UINT8_T = 332,                 /* UINT8_T  */
+    I64VEC2 = 333,                 /* I64VEC2  */
+    I64VEC3 = 334,                 /* I64VEC3  */
+    I64VEC4 = 335,                 /* I64VEC4  */
+    U64VEC2 = 336,                 /* U64VEC2  */
+    U64VEC3 = 337,                 /* U64VEC3  */
+    U64VEC4 = 338,                 /* U64VEC4  */
+    I32VEC2 = 339,                 /* I32VEC2  */
+    I32VEC3 = 340,                 /* I32VEC3  */
+    I32VEC4 = 341,                 /* I32VEC4  */
+    U32VEC2 = 342,                 /* U32VEC2  */
+    U32VEC3 = 343,                 /* U32VEC3  */
+    U32VEC4 = 344,                 /* U32VEC4  */
+    I16VEC2 = 345,                 /* I16VEC2  */
+    I16VEC3 = 346,                 /* I16VEC3  */
+    I16VEC4 = 347,                 /* I16VEC4  */
+    U16VEC2 = 348,                 /* U16VEC2  */
+    U16VEC3 = 349,                 /* U16VEC3  */
+    U16VEC4 = 350,                 /* U16VEC4  */
+    I8VEC2 = 351,                  /* I8VEC2  */
+    I8VEC3 = 352,                  /* I8VEC3  */
+    I8VEC4 = 353,                  /* I8VEC4  */
+    U8VEC2 = 354,                  /* U8VEC2  */
+    U8VEC3 = 355,                  /* U8VEC3  */
+    U8VEC4 = 356,                  /* U8VEC4  */
+    DVEC2 = 357,                   /* DVEC2  */
+    DVEC3 = 358,                   /* DVEC3  */
+    DVEC4 = 359,                   /* DVEC4  */
+    DMAT2 = 360,                   /* DMAT2  */
+    DMAT3 = 361,                   /* DMAT3  */
+    DMAT4 = 362,                   /* DMAT4  */
+    BF16VEC2 = 363,                /* BF16VEC2  */
+    BF16VEC3 = 364,                /* BF16VEC3  */
+    BF16VEC4 = 365,                /* BF16VEC4  */
+    FE5M2VEC2 = 366,               /* FE5M2VEC2  */
+    FE5M2VEC3 = 367,               /* FE5M2VEC3  */
+    FE5M2VEC4 = 368,               /* FE5M2VEC4  */
+    FE4M3VEC2 = 369,               /* FE4M3VEC2  */
+    FE4M3VEC3 = 370,               /* FE4M3VEC3  */
+    FE4M3VEC4 = 371,               /* FE4M3VEC4  */
+    F16VEC2 = 372,                 /* F16VEC2  */
+    F16VEC3 = 373,                 /* F16VEC3  */
+    F16VEC4 = 374,                 /* F16VEC4  */
+    F16MAT2 = 375,                 /* F16MAT2  */
+    F16MAT3 = 376,                 /* F16MAT3  */
+    F16MAT4 = 377,                 /* F16MAT4  */
+    F32VEC2 = 378,                 /* F32VEC2  */
+    F32VEC3 = 379,                 /* F32VEC3  */
+    F32VEC4 = 380,                 /* F32VEC4  */
+    F32MAT2 = 381,                 /* F32MAT2  */
+    F32MAT3 = 382,                 /* F32MAT3  */
+    F32MAT4 = 383,                 /* F32MAT4  */
+    F64VEC2 = 384,                 /* F64VEC2  */
+    F64VEC3 = 385,                 /* F64VEC3  */
+    F64VEC4 = 386,                 /* F64VEC4  */
+    F64MAT2 = 387,                 /* F64MAT2  */
+    F64MAT3 = 388,                 /* F64MAT3  */
+    F64MAT4 = 389,                 /* F64MAT4  */
+    DMAT2X2 = 390,                 /* DMAT2X2  */
+    DMAT2X3 = 391,                 /* DMAT2X3  */
+    DMAT2X4 = 392,                 /* DMAT2X4  */
+    DMAT3X2 = 393,                 /* DMAT3X2  */
+    DMAT3X3 = 394,                 /* DMAT3X3  */
+    DMAT3X4 = 395,                 /* DMAT3X4  */
+    DMAT4X2 = 396,                 /* DMAT4X2  */
+    DMAT4X3 = 397,                 /* DMAT4X3  */
+    DMAT4X4 = 398,                 /* DMAT4X4  */
+    F16MAT2X2 = 399,               /* F16MAT2X2  */
+    F16MAT2X3 = 400,               /* F16MAT2X3  */
+    F16MAT2X4 = 401,               /* F16MAT2X4  */
+    F16MAT3X2 = 402,               /* F16MAT3X2  */
+    F16MAT3X3 = 403,               /* F16MAT3X3  */
+    F16MAT3X4 = 404,               /* F16MAT3X4  */
+    F16MAT4X2 = 405,               /* F16MAT4X2  */
+    F16MAT4X3 = 406,               /* F16MAT4X3  */
+    F16MAT4X4 = 407,               /* F16MAT4X4  */
+    F32MAT2X2 = 408,               /* F32MAT2X2  */
+    F32MAT2X3 = 409,               /* F32MAT2X3  */
+    F32MAT2X4 = 410,               /* F32MAT2X4  */
+    F32MAT3X2 = 411,               /* F32MAT3X2  */
+    F32MAT3X3 = 412,               /* F32MAT3X3  */
+    F32MAT3X4 = 413,               /* F32MAT3X4  */
+    F32MAT4X2 = 414,               /* F32MAT4X2  */
+    F32MAT4X3 = 415,               /* F32MAT4X3  */
+    F32MAT4X4 = 416,               /* F32MAT4X4  */
+    F64MAT2X2 = 417,               /* F64MAT2X2  */
+    F64MAT2X3 = 418,               /* F64MAT2X3  */
+    F64MAT2X4 = 419,               /* F64MAT2X4  */
+    F64MAT3X2 = 420,               /* F64MAT3X2  */
+    F64MAT3X3 = 421,               /* F64MAT3X3  */
+    F64MAT3X4 = 422,               /* F64MAT3X4  */
+    F64MAT4X2 = 423,               /* F64MAT4X2  */
+    F64MAT4X3 = 424,               /* F64MAT4X3  */
+    F64MAT4X4 = 425,               /* F64MAT4X4  */
+    ATOMIC_UINT = 426,             /* ATOMIC_UINT  */
+    ACCSTRUCTNV = 427,             /* ACCSTRUCTNV  */
+    ACCSTRUCTEXT = 428,            /* ACCSTRUCTEXT  */
+    RAYQUERYEXT = 429,             /* RAYQUERYEXT  */
+    FCOOPMATNV = 430,              /* FCOOPMATNV  */
+    ICOOPMATNV = 431,              /* ICOOPMATNV  */
+    UCOOPMATNV = 432,              /* UCOOPMATNV  */
+    COOPMAT = 433,                 /* COOPMAT  */
+    COOPVECNV = 434,               /* COOPVECNV  */
+    HITOBJECTNV = 435,             /* HITOBJECTNV  */
+    HITOBJECTATTRNV = 436,         /* HITOBJECTATTRNV  */
+    TENSORLAYOUTNV = 437,          /* TENSORLAYOUTNV  */
+    TENSORVIEWNV = 438,            /* TENSORVIEWNV  */
+    TENSORARM = 439,               /* TENSORARM  */
+    SAMPLERCUBEARRAY = 440,        /* SAMPLERCUBEARRAY  */
+    SAMPLERCUBEARRAYSHADOW = 441,  /* SAMPLERCUBEARRAYSHADOW  */
+    ISAMPLERCUBEARRAY = 442,       /* ISAMPLERCUBEARRAY  */
+    USAMPLERCUBEARRAY = 443,       /* USAMPLERCUBEARRAY  */
+    SAMPLER1D = 444,               /* SAMPLER1D  */
+    SAMPLER1DARRAY = 445,          /* SAMPLER1DARRAY  */
+    SAMPLER1DARRAYSHADOW = 446,    /* SAMPLER1DARRAYSHADOW  */
+    ISAMPLER1D = 447,              /* ISAMPLER1D  */
+    SAMPLER1DSHADOW = 448,         /* SAMPLER1DSHADOW  */
+    SAMPLER2DRECT = 449,           /* SAMPLER2DRECT  */
+    SAMPLER2DRECTSHADOW = 450,     /* SAMPLER2DRECTSHADOW  */
+    ISAMPLER2DRECT = 451,          /* ISAMPLER2DRECT  */
+    USAMPLER2DRECT = 452,          /* USAMPLER2DRECT  */
+    SAMPLERBUFFER = 453,           /* SAMPLERBUFFER  */
+    ISAMPLERBUFFER = 454,          /* ISAMPLERBUFFER  */
+    USAMPLERBUFFER = 455,          /* USAMPLERBUFFER  */
+    SAMPLER2DMS = 456,             /* SAMPLER2DMS  */
+    ISAMPLER2DMS = 457,            /* ISAMPLER2DMS  */
+    USAMPLER2DMS = 458,            /* USAMPLER2DMS  */
+    SAMPLER2DMSARRAY = 459,        /* SAMPLER2DMSARRAY  */
+    ISAMPLER2DMSARRAY = 460,       /* ISAMPLER2DMSARRAY  */
+    USAMPLER2DMSARRAY = 461,       /* USAMPLER2DMSARRAY  */
+    SAMPLEREXTERNALOES = 462,      /* SAMPLEREXTERNALOES  */
+    SAMPLEREXTERNAL2DY2YEXT = 463, /* SAMPLEREXTERNAL2DY2YEXT  */
+    ISAMPLER1DARRAY = 464,         /* ISAMPLER1DARRAY  */
+    USAMPLER1D = 465,              /* USAMPLER1D  */
+    USAMPLER1DARRAY = 466,         /* USAMPLER1DARRAY  */
+    F16SAMPLER1D = 467,            /* F16SAMPLER1D  */
+    F16SAMPLER2D = 468,            /* F16SAMPLER2D  */
+    F16SAMPLER3D = 469,            /* F16SAMPLER3D  */
+    F16SAMPLER2DRECT = 470,        /* F16SAMPLER2DRECT  */
+    F16SAMPLERCUBE = 471,          /* F16SAMPLERCUBE  */
+    F16SAMPLER1DARRAY = 472,       /* F16SAMPLER1DARRAY  */
+    F16SAMPLER2DARRAY = 473,       /* F16SAMPLER2DARRAY  */
+    F16SAMPLERCUBEARRAY = 474,     /* F16SAMPLERCUBEARRAY  */
+    F16SAMPLERBUFFER = 475,        /* F16SAMPLERBUFFER  */
+    F16SAMPLER2DMS = 476,          /* F16SAMPLER2DMS  */
+    F16SAMPLER2DMSARRAY = 477,     /* F16SAMPLER2DMSARRAY  */
+    F16SAMPLER1DSHADOW = 478,      /* F16SAMPLER1DSHADOW  */
+    F16SAMPLER2DSHADOW = 479,      /* F16SAMPLER2DSHADOW  */
+    F16SAMPLER1DARRAYSHADOW = 480, /* F16SAMPLER1DARRAYSHADOW  */
+    F16SAMPLER2DARRAYSHADOW = 481, /* F16SAMPLER2DARRAYSHADOW  */
+    F16SAMPLER2DRECTSHADOW = 482,  /* F16SAMPLER2DRECTSHADOW  */
+    F16SAMPLERCUBESHADOW = 483,    /* F16SAMPLERCUBESHADOW  */
+    F16SAMPLERCUBEARRAYSHADOW = 484, /* F16SAMPLERCUBEARRAYSHADOW  */
+    IMAGE1D = 485,                 /* IMAGE1D  */
+    IIMAGE1D = 486,                /* IIMAGE1D  */
+    UIMAGE1D = 487,                /* UIMAGE1D  */
+    IMAGE2D = 488,                 /* IMAGE2D  */
+    IIMAGE2D = 489,                /* IIMAGE2D  */
+    UIMAGE2D = 490,                /* UIMAGE2D  */
+    IMAGE3D = 491,                 /* IMAGE3D  */
+    IIMAGE3D = 492,                /* IIMAGE3D  */
+    UIMAGE3D = 493,                /* UIMAGE3D  */
+    IMAGE2DRECT = 494,             /* IMAGE2DRECT  */
+    IIMAGE2DRECT = 495,            /* IIMAGE2DRECT  */
+    UIMAGE2DRECT = 496,            /* UIMAGE2DRECT  */
+    IMAGECUBE = 497,               /* IMAGECUBE  */
+    IIMAGECUBE = 498,              /* IIMAGECUBE  */
+    UIMAGECUBE = 499,              /* UIMAGECUBE  */
+    IMAGEBUFFER = 500,             /* IMAGEBUFFER  */
+    IIMAGEBUFFER = 501,            /* IIMAGEBUFFER  */
+    UIMAGEBUFFER = 502,            /* UIMAGEBUFFER  */
+    IMAGE1DARRAY = 503,            /* IMAGE1DARRAY  */
+    IIMAGE1DARRAY = 504,           /* IIMAGE1DARRAY  */
+    UIMAGE1DARRAY = 505,           /* UIMAGE1DARRAY  */
+    IMAGE2DARRAY = 506,            /* IMAGE2DARRAY  */
+    IIMAGE2DARRAY = 507,           /* IIMAGE2DARRAY  */
+    UIMAGE2DARRAY = 508,           /* UIMAGE2DARRAY  */
+    IMAGECUBEARRAY = 509,          /* IMAGECUBEARRAY  */
+    IIMAGECUBEARRAY = 510,         /* IIMAGECUBEARRAY  */
+    UIMAGECUBEARRAY = 511,         /* UIMAGECUBEARRAY  */
+    IMAGE2DMS = 512,               /* IMAGE2DMS  */
+    IIMAGE2DMS = 513,              /* IIMAGE2DMS  */
+    UIMAGE2DMS = 514,              /* UIMAGE2DMS  */
+    IMAGE2DMSARRAY = 515,          /* IMAGE2DMSARRAY  */
+    IIMAGE2DMSARRAY = 516,         /* IIMAGE2DMSARRAY  */
+    UIMAGE2DMSARRAY = 517,         /* UIMAGE2DMSARRAY  */
+    F16IMAGE1D = 518,              /* F16IMAGE1D  */
+    F16IMAGE2D = 519,              /* F16IMAGE2D  */
+    F16IMAGE3D = 520,              /* F16IMAGE3D  */
+    F16IMAGE2DRECT = 521,          /* F16IMAGE2DRECT  */
+    F16IMAGECUBE = 522,            /* F16IMAGECUBE  */
+    F16IMAGE1DARRAY = 523,         /* F16IMAGE1DARRAY  */
+    F16IMAGE2DARRAY = 524,         /* F16IMAGE2DARRAY  */
+    F16IMAGECUBEARRAY = 525,       /* F16IMAGECUBEARRAY  */
+    F16IMAGEBUFFER = 526,          /* F16IMAGEBUFFER  */
+    F16IMAGE2DMS = 527,            /* F16IMAGE2DMS  */
+    F16IMAGE2DMSARRAY = 528,       /* F16IMAGE2DMSARRAY  */
+    I64IMAGE1D = 529,              /* I64IMAGE1D  */
+    U64IMAGE1D = 530,              /* U64IMAGE1D  */
+    I64IMAGE2D = 531,              /* I64IMAGE2D  */
+    U64IMAGE2D = 532,              /* U64IMAGE2D  */
+    I64IMAGE3D = 533,              /* I64IMAGE3D  */
+    U64IMAGE3D = 534,              /* U64IMAGE3D  */
+    I64IMAGE2DRECT = 535,          /* I64IMAGE2DRECT  */
+    U64IMAGE2DRECT = 536,          /* U64IMAGE2DRECT  */
+    I64IMAGECUBE = 537,            /* I64IMAGECUBE  */
+    U64IMAGECUBE = 538,            /* U64IMAGECUBE  */
+    I64IMAGEBUFFER = 539,          /* I64IMAGEBUFFER  */
+    U64IMAGEBUFFER = 540,          /* U64IMAGEBUFFER  */
+    I64IMAGE1DARRAY = 541,         /* I64IMAGE1DARRAY  */
+    U64IMAGE1DARRAY = 542,         /* U64IMAGE1DARRAY  */
+    I64IMAGE2DARRAY = 543,         /* I64IMAGE2DARRAY  */
+    U64IMAGE2DARRAY = 544,         /* U64IMAGE2DARRAY  */
+    I64IMAGECUBEARRAY = 545,       /* I64IMAGECUBEARRAY  */
+    U64IMAGECUBEARRAY = 546,       /* U64IMAGECUBEARRAY  */
+    I64IMAGE2DMS = 547,            /* I64IMAGE2DMS  */
+    U64IMAGE2DMS = 548,            /* U64IMAGE2DMS  */
+    I64IMAGE2DMSARRAY = 549,       /* I64IMAGE2DMSARRAY  */
+    U64IMAGE2DMSARRAY = 550,       /* U64IMAGE2DMSARRAY  */
+    TEXTURECUBEARRAY = 551,        /* TEXTURECUBEARRAY  */
+    ITEXTURECUBEARRAY = 552,       /* ITEXTURECUBEARRAY  */
+    UTEXTURECUBEARRAY = 553,       /* UTEXTURECUBEARRAY  */
+    TEXTURE1D = 554,               /* TEXTURE1D  */
+    ITEXTURE1D = 555,              /* ITEXTURE1D  */
+    UTEXTURE1D = 556,              /* UTEXTURE1D  */
+    TEXTURE1DARRAY = 557,          /* TEXTURE1DARRAY  */
+    ITEXTURE1DARRAY = 558,         /* ITEXTURE1DARRAY  */
+    UTEXTURE1DARRAY = 559,         /* UTEXTURE1DARRAY  */
+    TEXTURE2DRECT = 560,           /* TEXTURE2DRECT  */
+    ITEXTURE2DRECT = 561,          /* ITEXTURE2DRECT  */
+    UTEXTURE2DRECT = 562,          /* UTEXTURE2DRECT  */
+    TEXTUREBUFFER = 563,           /* TEXTUREBUFFER  */
+    ITEXTUREBUFFER = 564,          /* ITEXTUREBUFFER  */
+    UTEXTUREBUFFER = 565,          /* UTEXTUREBUFFER  */
+    TEXTURE2DMS = 566,             /* TEXTURE2DMS  */
+    ITEXTURE2DMS = 567,            /* ITEXTURE2DMS  */
+    UTEXTURE2DMS = 568,            /* UTEXTURE2DMS  */
+    TEXTURE2DMSARRAY = 569,        /* TEXTURE2DMSARRAY  */
+    ITEXTURE2DMSARRAY = 570,       /* ITEXTURE2DMSARRAY  */
+    UTEXTURE2DMSARRAY = 571,       /* UTEXTURE2DMSARRAY  */
+    F16TEXTURE1D = 572,            /* F16TEXTURE1D  */
+    F16TEXTURE2D = 573,            /* F16TEXTURE2D  */
+    F16TEXTURE3D = 574,            /* F16TEXTURE3D  */
+    F16TEXTURE2DRECT = 575,        /* F16TEXTURE2DRECT  */
+    F16TEXTURECUBE = 576,          /* F16TEXTURECUBE  */
+    F16TEXTURE1DARRAY = 577,       /* F16TEXTURE1DARRAY  */
+    F16TEXTURE2DARRAY = 578,       /* F16TEXTURE2DARRAY  */
+    F16TEXTURECUBEARRAY = 579,     /* F16TEXTURECUBEARRAY  */
+    F16TEXTUREBUFFER = 580,        /* F16TEXTUREBUFFER  */
+    F16TEXTURE2DMS = 581,          /* F16TEXTURE2DMS  */
+    F16TEXTURE2DMSARRAY = 582,     /* F16TEXTURE2DMSARRAY  */
+    SUBPASSINPUT = 583,            /* SUBPASSINPUT  */
+    SUBPASSINPUTMS = 584,          /* SUBPASSINPUTMS  */
+    ISUBPASSINPUT = 585,           /* ISUBPASSINPUT  */
+    ISUBPASSINPUTMS = 586,         /* ISUBPASSINPUTMS  */
+    USUBPASSINPUT = 587,           /* USUBPASSINPUT  */
+    USUBPASSINPUTMS = 588,         /* USUBPASSINPUTMS  */
+    F16SUBPASSINPUT = 589,         /* F16SUBPASSINPUT  */
+    F16SUBPASSINPUTMS = 590,       /* F16SUBPASSINPUTMS  */
+    SPIRV_INSTRUCTION = 591,       /* SPIRV_INSTRUCTION  */
+    SPIRV_EXECUTION_MODE = 592,    /* SPIRV_EXECUTION_MODE  */
+    SPIRV_EXECUTION_MODE_ID = 593, /* SPIRV_EXECUTION_MODE_ID  */
+    SPIRV_DECORATE = 594,          /* SPIRV_DECORATE  */
+    SPIRV_DECORATE_ID = 595,       /* SPIRV_DECORATE_ID  */
+    SPIRV_DECORATE_STRING = 596,   /* SPIRV_DECORATE_STRING  */
+    SPIRV_TYPE = 597,              /* SPIRV_TYPE  */
+    SPIRV_STORAGE_CLASS = 598,     /* SPIRV_STORAGE_CLASS  */
+    SPIRV_BY_REFERENCE = 599,      /* SPIRV_BY_REFERENCE  */
+    SPIRV_LITERAL = 600,           /* SPIRV_LITERAL  */
+    ATTACHMENTEXT = 601,           /* ATTACHMENTEXT  */
+    IATTACHMENTEXT = 602,          /* IATTACHMENTEXT  */
+    UATTACHMENTEXT = 603,          /* UATTACHMENTEXT  */
+    LEFT_OP = 604,                 /* LEFT_OP  */
+    RIGHT_OP = 605,                /* RIGHT_OP  */
+    INC_OP = 606,                  /* INC_OP  */
+    DEC_OP = 607,                  /* DEC_OP  */
+    LE_OP = 608,                   /* LE_OP  */
+    GE_OP = 609,                   /* GE_OP  */
+    EQ_OP = 610,                   /* EQ_OP  */
+    NE_OP = 611,                   /* NE_OP  */
+    AND_OP = 612,                  /* AND_OP  */
+    OR_OP = 613,                   /* OR_OP  */
+    XOR_OP = 614,                  /* XOR_OP  */
+    MUL_ASSIGN = 615,              /* MUL_ASSIGN  */
+    DIV_ASSIGN = 616,              /* DIV_ASSIGN  */
+    ADD_ASSIGN = 617,              /* ADD_ASSIGN  */
+    MOD_ASSIGN = 618,              /* MOD_ASSIGN  */
+    LEFT_ASSIGN = 619,             /* LEFT_ASSIGN  */
+    RIGHT_ASSIGN = 620,            /* RIGHT_ASSIGN  */
+    AND_ASSIGN = 621,              /* AND_ASSIGN  */
+    XOR_ASSIGN = 622,              /* XOR_ASSIGN  */
+    OR_ASSIGN = 623,               /* OR_ASSIGN  */
+    SUB_ASSIGN = 624,              /* SUB_ASSIGN  */
+    STRING_LITERAL = 625,          /* STRING_LITERAL  */
+    LEFT_PAREN = 626,              /* LEFT_PAREN  */
+    RIGHT_PAREN = 627,             /* RIGHT_PAREN  */
+    LEFT_BRACKET = 628,            /* LEFT_BRACKET  */
+    RIGHT_BRACKET = 629,           /* RIGHT_BRACKET  */
+    LEFT_BRACE = 630,              /* LEFT_BRACE  */
+    RIGHT_BRACE = 631,             /* RIGHT_BRACE  */
+    DOT = 632,                     /* DOT  */
+    COMMA = 633,                   /* COMMA  */
+    COLON = 634,                   /* COLON  */
+    EQUAL = 635,                   /* EQUAL  */
+    SEMICOLON = 636,               /* SEMICOLON  */
+    BANG = 637,                    /* BANG  */
+    DASH = 638,                    /* DASH  */
+    TILDE = 639,                   /* TILDE  */
+    PLUS = 640,                    /* PLUS  */
+    STAR = 641,                    /* STAR  */
+    SLASH = 642,                   /* SLASH  */
+    PERCENT = 643,                 /* PERCENT  */
+    LEFT_ANGLE = 644,              /* LEFT_ANGLE  */
+    RIGHT_ANGLE = 645,             /* RIGHT_ANGLE  */
+    VERTICAL_BAR = 646,            /* VERTICAL_BAR  */
+    CARET = 647,                   /* CARET  */
+    AMPERSAND = 648,               /* AMPERSAND  */
+    QUESTION = 649,                /* QUESTION  */
+    INVARIANT = 650,               /* INVARIANT  */
+    HIGH_PRECISION = 651,          /* HIGH_PRECISION  */
+    MEDIUM_PRECISION = 652,        /* MEDIUM_PRECISION  */
+    LOW_PRECISION = 653,           /* LOW_PRECISION  */
+    PRECISION = 654,               /* PRECISION  */
+    PACKED = 655,                  /* PACKED  */
+    RESOURCE = 656,                /* RESOURCE  */
+    SUPERP = 657,                  /* SUPERP  */
+    FLOATCONSTANT = 658,           /* FLOATCONSTANT  */
+    INTCONSTANT = 659,             /* INTCONSTANT  */
+    UINTCONSTANT = 660,            /* UINTCONSTANT  */
+    BOOLCONSTANT = 661,            /* BOOLCONSTANT  */
+    IDENTIFIER = 662,              /* IDENTIFIER  */
+    TYPE_NAME = 663,               /* TYPE_NAME  */
+    CENTROID = 664,                /* CENTROID  */
+    IN = 665,                      /* IN  */
+    OUT = 666,                     /* OUT  */
+    INOUT = 667,                   /* INOUT  */
+    STRUCT = 668,                  /* STRUCT  */
+    VOID = 669,                    /* VOID  */
+    WHILE = 670,                   /* WHILE  */
+    BREAK = 671,                   /* BREAK  */
+    CONTINUE = 672,                /* CONTINUE  */
+    DO = 673,                      /* DO  */
+    ELSE = 674,                    /* ELSE  */
+    FOR = 675,                     /* FOR  */
+    IF = 676,                      /* IF  */
+    DISCARD = 677,                 /* DISCARD  */
+    RETURN = 678,                  /* RETURN  */
+    SWITCH = 679,                  /* SWITCH  */
+    CASE = 680,                    /* CASE  */
+    DEFAULT = 681,                 /* DEFAULT  */
+    TERMINATE_INVOCATION = 682,    /* TERMINATE_INVOCATION  */
+    TERMINATE_RAY = 683,           /* TERMINATE_RAY  */
+    IGNORE_INTERSECTION = 684,     /* IGNORE_INTERSECTION  */
+    UNIFORM = 685,                 /* UNIFORM  */
+    SHARED = 686,                  /* SHARED  */
+    BUFFER = 687,                  /* BUFFER  */
+    TILEIMAGEEXT = 688,            /* TILEIMAGEEXT  */
+    FLAT = 689,                    /* FLAT  */
+    SMOOTH = 690,                  /* SMOOTH  */
+    LAYOUT = 691,                  /* LAYOUT  */
+    DOUBLECONSTANT = 692,          /* DOUBLECONSTANT  */
+    INT16CONSTANT = 693,           /* INT16CONSTANT  */
+    UINT16CONSTANT = 694,          /* UINT16CONSTANT  */
+    FLOAT16CONSTANT = 695,         /* FLOAT16CONSTANT  */
+    INT32CONSTANT = 696,           /* INT32CONSTANT  */
+    UINT32CONSTANT = 697,          /* UINT32CONSTANT  */
+    INT64CONSTANT = 698,           /* INT64CONSTANT  */
+    UINT64CONSTANT = 699,          /* UINT64CONSTANT  */
+    SUBROUTINE = 700,              /* SUBROUTINE  */
+    DEMOTE = 701,                  /* DEMOTE  */
+    FUNCTION = 702,                /* FUNCTION  */
+    PAYLOADNV = 703,               /* PAYLOADNV  */
+    PAYLOADINNV = 704,             /* PAYLOADINNV  */
+    HITATTRNV = 705,               /* HITATTRNV  */
+    CALLDATANV = 706,              /* CALLDATANV  */
+    CALLDATAINNV = 707,            /* CALLDATAINNV  */
+    PAYLOADEXT = 708,              /* PAYLOADEXT  */
+    PAYLOADINEXT = 709,            /* PAYLOADINEXT  */
+    HITATTREXT = 710,              /* HITATTREXT  */
+    CALLDATAEXT = 711,             /* CALLDATAEXT  */
+    CALLDATAINEXT = 712,           /* CALLDATAINEXT  */
+    PATCH = 713,                   /* PATCH  */
+    SAMPLE = 714,                  /* SAMPLE  */
+    NONUNIFORM = 715,              /* NONUNIFORM  */
+    COHERENT = 716,                /* COHERENT  */
+    VOLATILE = 717,                /* VOLATILE  */
+    RESTRICT = 718,                /* RESTRICT  */
+    READONLY = 719,                /* READONLY  */
+    WRITEONLY = 720,               /* WRITEONLY  */
+    NONTEMPORAL = 721,             /* NONTEMPORAL  */
+    DEVICECOHERENT = 722,          /* DEVICECOHERENT  */
+    QUEUEFAMILYCOHERENT = 723,     /* QUEUEFAMILYCOHERENT  */
+    WORKGROUPCOHERENT = 724,       /* WORKGROUPCOHERENT  */
+    SUBGROUPCOHERENT = 725,        /* SUBGROUPCOHERENT  */
+    NONPRIVATE = 726,              /* NONPRIVATE  */
+    SHADERCALLCOHERENT = 727,      /* SHADERCALLCOHERENT  */
+    NOPERSPECTIVE = 728,           /* NOPERSPECTIVE  */
+    EXPLICITINTERPAMD = 729,       /* EXPLICITINTERPAMD  */
+    PERVERTEXEXT = 730,            /* PERVERTEXEXT  */
+    PERVERTEXNV = 731,             /* PERVERTEXNV  */
+    PERPRIMITIVENV = 732,          /* PERPRIMITIVENV  */
+    PERVIEWNV = 733,               /* PERVIEWNV  */
+    PERTASKNV = 734,               /* PERTASKNV  */
+    PERPRIMITIVEEXT = 735,         /* PERPRIMITIVEEXT  */
+    TASKPAYLOADWORKGROUPEXT = 736, /* TASKPAYLOADWORKGROUPEXT  */
+    PRECISE = 737                  /* PRECISE  */
   };
   typedef enum yytokentype yytoken_kind_t;
 #endif
@@ -572,7 +581,7 @@ union YYSTYPE
         glslang::TTypeParameters* typeParameters;
     } interm;
 
-#line 576 "MachineIndependent/glslang_tab.cpp.h"
+#line 585 "MachineIndependent/glslang_tab.cpp.h"
 
 };
 typedef union YYSTYPE YYSTYPE;

+ 19 - 0
3rdparty/glslang/glslang/MachineIndependent/intermOut.cpp

@@ -629,6 +629,14 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
     case EOpConstructBF16Vec2:  out.debug << "Construct bf16vec2";   break;
     case EOpConstructBF16Vec3:  out.debug << "Construct bf16vec3";   break;
     case EOpConstructBF16Vec4:  out.debug << "Construct bf16vec4";   break;
+    case EOpConstructFloatE5M2:  out.debug << "Construct floate5m2_t"; break;
+    case EOpConstructFloatE5M2Vec2:  out.debug << "Construct fe5m2vec2";   break;
+    case EOpConstructFloatE5M2Vec3:  out.debug << "Construct fe5m2vec3";   break;
+    case EOpConstructFloatE5M2Vec4:  out.debug << "Construct fe5m2vec4";   break;
+    case EOpConstructFloatE4M3:  out.debug << "Construct floate4m3_t"; break;
+    case EOpConstructFloatE4M3Vec2:  out.debug << "Construct fe4m3vec2";   break;
+    case EOpConstructFloatE4M3Vec3:  out.debug << "Construct fe4m3vec3";   break;
+    case EOpConstructFloatE4M3Vec4:  out.debug << "Construct fe4m3vec4";   break;
     case EOpConstructFloat16:   out.debug << "Construct float16_t"; break;
     case EOpConstructF16Vec2:   out.debug << "Construct f16vec2";   break;
     case EOpConstructF16Vec3:   out.debug << "Construct f16vec3";   break;
@@ -650,6 +658,11 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
     case EOpConstructCooperativeVectorNV:  out.debug << "Construct cooperative vector NV";  break;
     case EOpConstructAccStruct: out.debug << "Construct acceleration structure"; break;
 
+    case EOpBitCastArrayQCOM:              out.debug << "Bitcast To Array QCOM"; break;
+    case EOpExtractSubArrayQCOM:           out.debug << "Extract Subarray QCOM"; break;
+    case EOpCompositeConstructCoopMatQCOM:   out.debug << "Construct Cooperative Matrix QCOM"; break;
+    case EOpCompositeExtractCoopMatQCOM:     out.debug << "Extract Cooperative Matrix QCOM"; break;
+
     case EOpLessThan:         out.debug << "Compare Less Than";             break;
     case EOpGreaterThan:      out.debug << "Compare Greater Than";          break;
     case EOpLessThanEqual:    out.debug << "Compare Less Than or Equal";    break;
@@ -975,6 +988,10 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
     case EOpCooperativeVectorOuterProductAccumulateNV: out.debug << "Cooperative vector outer product accumulate NV"; break;
     case EOpCooperativeVectorReduceSumAccumulateNV: out.debug << "Cooperative vector reduce sum accumulate NV"; break;
 
+    case EOpTensorReadARM:   out.debug << "Read from tensor";  break;
+    case EOpTensorWriteARM:  out.debug << "Write to tensor";  break;
+    case EOpTensorSizeARM:   out.debug << "Get tensor size";  break;
+
     case EOpIsHelperInvocation: out.debug << "IsHelperInvocation"; break;
     case EOpDebugPrintf:  out.debug << "Debug printf";  break;
 
@@ -1164,6 +1181,8 @@ static void OutputConstantUnion(TInfoSink& out, const TIntermTyped* node, const
         case EbtDouble:
         case EbtFloat16:
         case EbtBFloat16:
+        case EbtFloatE5M2:
+        case EbtFloatE4M3:
             OutputDouble(out, constUnion[i].getDConst(), extra);
             out.debug << "\n";
             break;

+ 35 - 6
3rdparty/glslang/glslang/MachineIndependent/linkValidate.cpp

@@ -535,6 +535,9 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
             error(infoSink, "number of invocations must match between compilation units");
     }
 
+    // The GLSL specification requires that at least one compilation unit
+    // must declare the vertices layout, but not all units need to do so.
+    // However, all declarations must match.
     if (vertices == TQualifier::layoutNotSet)
         vertices = unit.vertices;
     else if (unit.vertices != TQualifier::layoutNotSet && vertices != unit.vertices) {
@@ -545,20 +548,30 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
         else
             assert(0);
     }
+
+    // The mesh shader extension requires that at least one compilation unit
+    // must declare the max_primitives layout, but not all units need to do so.
+    // However, all declarations must match.
     if (primitives == TQualifier::layoutNotSet)
         primitives = unit.primitives;
-    else if (primitives != unit.primitives) {
+    else if (unit.primitives != TQualifier::layoutNotSet && primitives != unit.primitives) {
         if (language == EShLangMesh)
             error(infoSink, "Contradictory layout max_primitives values");
         else
             assert(0);
     }
 
+    // The GLSL specification requires that at least one compilation unit
+    // must declare the input primitive layout, but not all units need to do so.
+    // However, all declarations must match.
     if (inputPrimitive == ElgNone)
         inputPrimitive = unit.inputPrimitive;
     else if (unit.inputPrimitive != ElgNone && inputPrimitive != unit.inputPrimitive)
         error(infoSink, "Contradictory input layout primitives");
 
+    // The GLSL specification requires that at least one compilation unit
+    // must declare the output primitive layout, but not all units need to do so.
+    // However, all declarations must match.
     if (outputPrimitive == ElgNone)
         outputPrimitive = unit.outputPrimitive;
     else if (unit.outputPrimitive != ElgNone && outputPrimitive != unit.outputPrimitive)
@@ -567,19 +580,27 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
     if (originUpperLeft != unit.originUpperLeft || pixelCenterInteger != unit.pixelCenterInteger)
         error(infoSink, "gl_FragCoord redeclarations must match across shaders");
 
+    // The GLSL specification requires that at least one compilation unit
+    // must declare the vertex spacing layout, but not all units need to do so.
+    // However, all declarations must match.
     if (vertexSpacing == EvsNone)
         vertexSpacing = unit.vertexSpacing;
-    else if (vertexSpacing != unit.vertexSpacing)
+    else if (unit.vertexSpacing != EvsNone && vertexSpacing != unit.vertexSpacing)
         error(infoSink, "Contradictory input vertex spacing");
 
+    // The GLSL specification requires that at least one compilation unit
+    // must declare the triangle ordering layout, but not all units need to do so.
+    // However, all declarations must match.
     if (vertexOrder == EvoNone)
         vertexOrder = unit.vertexOrder;
-    else if (vertexOrder != unit.vertexOrder)
+    else if (unit.vertexOrder != EvoNone && vertexOrder != unit.vertexOrder)
         error(infoSink, "Contradictory triangle ordering");
 
     MERGE_TRUE(pointMode);
 
     for (int i = 0; i < 3; ++i) {
+        // The GLSL specification requires that all workgroup size declarations must match
+        // but not all units have to declare the layout.
         if (unit.localSizeNotDefault[i]) {
             if (!localSizeNotDefault[i]) {
                 localSize[i] = unit.localSize[i];
@@ -589,9 +610,11 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
                 error(infoSink, "Contradictory local size");
         }
 
+        // The GLSL specification requires that all workgroup size specialization
+        // ids declarations must match, but not all units have to declare the layout.
         if (localSizeSpecId[i] == TQualifier::layoutNotSet)
             localSizeSpecId[i] = unit.localSizeSpecId[i];
-        else if (localSizeSpecId[i] != unit.localSizeSpecId[i])
+        else if (unit.localSizeSpecId[i] != TQualifier::layoutNotSet && localSizeSpecId[i] != unit.localSizeSpecId[i])
             error(infoSink, "Contradictory local size specialization ids");
     }
 
@@ -602,9 +625,11 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
     MERGE_TRUE(nonCoherentStencilAttachmentReadEXT);
     MERGE_TRUE(nonCoherentTileAttachmentReadQCOM);
 
+    // The GLSL specification requires that all depth layout redeclarations must match,
+    // but not all units have to declare the layout.
     if (depthLayout == EldNone)
         depthLayout = unit.depthLayout;
-    else if (depthLayout != unit.depthLayout)
+    else if (unit.depthLayout != EldNone && depthLayout != unit.depthLayout)
         error(infoSink, "Contradictory depth layouts");
 
     MERGE_TRUE(depthReplacing);
@@ -615,9 +640,11 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
     MERGE_TRUE(xfbMode);
 
     for (size_t b = 0; b < xfbBuffers.size(); ++b) {
+        // The GLSL specification requires that all xfb_stride declarations for
+        // the same buffer must match, but not all units have to declare the layout.
         if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd)
             xfbBuffers[b].stride = unit.xfbBuffers[b].stride;
-        else if (xfbBuffers[b].stride != unit.xfbBuffers[b].stride)
+        else if (unit.xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].stride != unit.xfbBuffers[b].stride)
             error(infoSink, "Contradictory xfb_stride");
         xfbBuffers[b].implicitStride = std::max(xfbBuffers[b].implicitStride, unit.xfbBuffers[b].implicitStride);
         if (unit.xfbBuffers[b].contains64BitType)
@@ -2386,6 +2413,8 @@ int TIntermediate::getBaseAlignmentScalar(const TType& type, int& size)
     case EbtDouble:  size = 8; return 8;
     case EbtFloat16: size = 2; return 2;
     case EbtBFloat16: size = 2; return 2;
+    case EbtFloatE5M2:
+    case EbtFloatE4M3:
     case EbtInt8:
     case EbtUint8:   size = 1; return 1;
     case EbtInt16:

+ 4 - 0
3rdparty/glslang/glslang/MachineIndependent/parseVersions.h

@@ -104,6 +104,8 @@ public:
     virtual void float16Check(const TSourceLoc&, const char* op, bool builtIn = false);
     virtual void float16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
     virtual void bfloat16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
+    virtual void floate5m2ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
+    virtual void floate4m3ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
     virtual bool float16Arithmetic();
     virtual void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc);
     virtual void int16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false);
@@ -122,9 +124,11 @@ public:
     virtual void fcoopmatCheckNV(const TSourceLoc&, const char* op, bool builtIn = false);
     virtual void intcoopmatCheckNV(const TSourceLoc&, const char *op, bool builtIn = false);
     virtual void coopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false);
+    virtual void coopmatConverisonCheckQCOM(const TSourceLoc& loc, const char* op, bool builtIn = false);
     virtual void tensorLayoutViewCheck(const TSourceLoc&, const char* op, bool builtIn = false);
     virtual void coopvecCheck(const TSourceLoc&, const char* op, bool builtIn = false);
     virtual void intattachmentCheck(const TSourceLoc&, const char *op, bool builtIn = false);
+    virtual void tensorCheckARM(const TSourceLoc&, const char *op, bool builtIn = false);
     bool relaxedErrors()    const { return (messages & EShMsgRelaxedErrors) != 0; }
     bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; }
     bool isForwardCompatible() const { return forwardCompatible; }

+ 2 - 2
3rdparty/glslang/glslang/ResourceLimits/ResourceLimits.cpp

@@ -39,9 +39,9 @@
 
 #include "glslang/Public/ResourceLimits.h"
 
-TBuiltInResource Resources;
+static TBuiltInResource Resources;
 
-const TBuiltInResource DefaultTBuiltInResource = {
+static const TBuiltInResource DefaultTBuiltInResource = {
     /* .MaxLights = */ 32,
     /* .MaxClipPlanes = */ 6,
     /* .MaxTextureUnits = */ 32,

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است