4 лет назад · 20a7c685cc
--- a/tools/clang/lib/SPIRV/AstTypeProbe.cpp
+++ b/tools/clang/lib/SPIRV/AstTypeProbe.cpp
@@ -438,6 +438,8 @@ uint32_t getElementSpirvBitwidth(const ASTContext &astContext, QualType type,
 
				     case BuiltinType::Bool:
			
 
				     case BuiltinType::Int:
			
 
				     case BuiltinType::UInt:
			
 
				+    case BuiltinType::Int8_4Packed:
			
 
				+    case BuiltinType::UInt8_4Packed:
			
 
				     case BuiltinType::Float:
			
 
				       return 32;
			
 
				     case BuiltinType::Double:
			
@@ -456,6 +458,11 @@ uint32_t getElementSpirvBitwidth(const ASTContext &astContext, QualType type,
 
				     // if -enable-16bit-types is false.
			
 
				     case BuiltinType::HalfFloat:
			
 
				       return 32;
			
 
				+    case BuiltinType::UChar:
			
 
				+    case BuiltinType::Char_U:
			
 
				+    case BuiltinType::SChar:
			
 
				+    case BuiltinType::Char_S:
			
 
				+      return 8;
			
 
				     // The following types are treated as 16-bit if '-enable-16bit-types' option
			
 
				     // is enabled. They are treated as 32-bit otherwise.
			
 
				     case BuiltinType::Min12Int:
			
@@ -485,6 +492,24 @@ bool canTreatAsSameScalarType(QualType type1, QualType type2) {
 
				   type2.removeLocalConst();
			
 
				 
			
 
				   return (type1.getCanonicalType() == type2.getCanonicalType()) ||
			
 
				+         // Treat uint8_t4_packed and int8_t4_packed as the same because they
			
 
				+         // are both repressented as 32-bit unsigned integers in SPIR-V.
			
 
				+         (type1->isSpecificBuiltinType(BuiltinType::Int8_4Packed) &&
			
 
				+          type2->isSpecificBuiltinType(BuiltinType::UInt8_4Packed)) ||
			
 
				+         (type2->isSpecificBuiltinType(BuiltinType::Int8_4Packed) &&
			
 
				+          type1->isSpecificBuiltinType(BuiltinType::UInt8_4Packed)) ||
			
 
				+         // Treat uint8_t4_packed and uint32_t as the same because they
			
 
				+         // are both repressented as 32-bit unsigned integers in SPIR-V.
			
 
				+         (type1->isSpecificBuiltinType(BuiltinType::UInt) &&
			
 
				+          type2->isSpecificBuiltinType(BuiltinType::UInt8_4Packed)) ||
			
 
				+         (type2->isSpecificBuiltinType(BuiltinType::UInt) &&
			
 
				+          type1->isSpecificBuiltinType(BuiltinType::UInt8_4Packed)) ||
			
 
				+         // Treat int8_t4_packed and uint32_t as the same because they
			
 
				+         // are both repressented as 32-bit unsigned integers in SPIR-V.
			
 
				+         (type1->isSpecificBuiltinType(BuiltinType::UInt) &&
			
 
				+          type2->isSpecificBuiltinType(BuiltinType::Int8_4Packed)) ||
			
 
				+         (type2->isSpecificBuiltinType(BuiltinType::UInt) &&
			
 
				+          type1->isSpecificBuiltinType(BuiltinType::Int8_4Packed)) ||
			
 
				          // Treat 'literal float' and 'float' as the same
			
 
				          (type1->isSpecificBuiltinType(BuiltinType::LitFloat) &&
			
 
				           type2->isFloatingType()) ||
			
--- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
+++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
@@ -38,6 +38,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type,
 
				   // Integer-related capabilities
			
 
				   if (const auto *intType = dyn_cast<IntegerType>(type)) {
			
 
				     switch (intType->getBitwidth()) {
			
 
				+    case 8: {
			
 
				+      addCapability(spv::Capability::Int8);
			
 
				+      break;
			
 
				+    }
			
 
				     case 16: {
			
 
				       // Usage of a 16-bit integer type.
			
 
				       addCapability(spv::Capability::Int16);
			
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
@@ -284,6 +284,10 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type,
 
				           return spvContext.getSIntType(32);
			
 
				         case BuiltinType::UInt:
			
 
				         case BuiltinType::ULong:
			
 
				+        // The 'int8_t4_packed' and 'uint8_t4_packed' types are in fact 32-bit
			
 
				+        // unsigned integers.
			
 
				+        case BuiltinType::Int8_4Packed:
			
 
				+        case BuiltinType::UInt8_4Packed:
			
 
				           return spvContext.getUIntType(32);
			
 
				 
			
 
				           // void and bool
			
@@ -316,6 +320,14 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type,
 
				         case BuiltinType::UShort: // uint16_t
			
 
				           return spvContext.getUIntType(16);
			
 
				 
			
 
				+        // 8-bit integer types
			
 
				+        case BuiltinType::UChar:
			
 
				+        case BuiltinType::Char_U:
			
 
				+          return spvContext.getUIntType(8);
			
 
				+        case BuiltinType::SChar:
			
 
				+        case BuiltinType::Char_S:
			
 
				+          return spvContext.getSIntType(8);
			
 
				+
			
 
				           // Relaxed precision types
			
 
				         case BuiltinType::Min10Float:
			
 
				         case BuiltinType::Min16Float:
			
--- a/tools/clang/lib/SPIRV/SpirvContext.cpp
+++ b/tools/clang/lib/SPIRV/SpirvContext.cpp
@@ -98,7 +98,7 @@ SpirvContext::~SpirvContext() {
 
				 }
			
 
				 
			
 
				 inline uint32_t log2ForBitwidth(uint32_t bitwidth) {
			
 
				-  assert(bitwidth >= 16 && bitwidth <= 64 && llvm::isPowerOf2_32(bitwidth));
			
 
				+  assert(bitwidth >= 8 && bitwidth <= 64 && llvm::isPowerOf2_32(bitwidth));
			
 
				 
			
 
				   return llvm::Log2_32(bitwidth);
			
 
				 }
			
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -7451,6 +7451,20 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
				     }
			
 
				     break;
			
 
				   }
			
 
				+  case hlsl::IntrinsicOp::IOP_pack_s8:
			
 
				+  case hlsl::IntrinsicOp::IOP_pack_u8:
			
 
				+  case hlsl::IntrinsicOp::IOP_pack_clamp_s8:
			
 
				+  case hlsl::IntrinsicOp::IOP_pack_clamp_u8: {
			
 
				+    retVal = processIntrinsic8BitPack(callExpr, hlslOpcode);
			
 
				+    break;
			
 
				+  }
			
 
				+  case hlsl::IntrinsicOp::IOP_unpack_s8s16:
			
 
				+  case hlsl::IntrinsicOp::IOP_unpack_s8s32:
			
 
				+  case hlsl::IntrinsicOp::IOP_unpack_u8u16:
			
 
				+  case hlsl::IntrinsicOp::IOP_unpack_u8u32: {
			
 
				+    retVal = processIntrinsic8BitUnpack(callExpr, hlslOpcode);
			
 
				+    break;
			
 
				+  }
			
 
				   // DXR raytracing intrinsics
			
 
				   case hlsl::IntrinsicOp::IOP_DispatchRaysDimensions:
			
 
				   case hlsl::IntrinsicOp::IOP_DispatchRaysIndex:
			
@@ -9812,6 +9826,163 @@ SpirvEmitter::processIntrinsicLog10(const CallExpr *callExpr) {
 
				   return spvBuilder.createBinaryOp(scaleOp, returnType, log2, scale, loc);
			
 
				 }
			
 
				 
			
 
				+SpirvInstruction *
			
 
				+SpirvEmitter::processIntrinsic8BitPack(const CallExpr *callExpr,
			
 
				+                                       hlsl::IntrinsicOp op) {
			
 
				+  const auto loc = callExpr->getExprLoc();
			
 
				+  assert(op == hlsl::IntrinsicOp::IOP_pack_s8 ||
			
 
				+         op == hlsl::IntrinsicOp::IOP_pack_u8 ||
			
 
				+         op == hlsl::IntrinsicOp::IOP_pack_clamp_s8 ||
			
 
				+         op == hlsl::IntrinsicOp::IOP_pack_clamp_u8);
			
 
				+
			
 
				+  // Here's the signature for the pack intrinsic operations:
			
 
				+  //
			
 
				+  // uint8_t4_packed pack_u8(uint32_t4 unpackedVal);
			
 
				+  // uint8_t4_packed pack_u8(uint16_t4 unpackedVal);
			
 
				+  // int8_t4_packed pack_s8(int32_t4 unpackedVal);
			
 
				+  // int8_t4_packed pack_s8(int16_t4 unpackedVal);
			
 
				+  //
			
 
				+  // These functions take a vec4 of 16-bit or 32-bit integers as input. For each
			
 
				+  // element of the vec4, they pick the lower 8 bits, and drop the other bits.
			
 
				+  // The result is four 8-bit values (32 bits in total) which are packed in an
			
 
				+  // unsigned uint32_t.
			
 
				+  //
			
 
				+  //
			
 
				+  // Here's the signature for the pack_clamp intrinsic operations:
			
 
				+  //
			
 
				+  // uint8_t4_packed pack_clamp_u8(int32_t4 val); // Pack and Clamp [0, 255]
			
 
				+  // uint8_t4_packed pack_clamp_u8(int16_t4 val); // Pack and Clamp [0, 255]
			
 
				+  //
			
 
				+  // int8_t4_packed pack_clamp_s8(int32_t4 val);  // Pack and Clamp [-128, 127]
			
 
				+  // int8_t4_packed pack_clamp_s8(int16_t4 val);  // Pack and Clamp [-128, 127]
			
 
				+  //
			
 
				+  // These functions take a vec4 of 16-bit or 32-bit integers as input. For each
			
 
				+  // element of the vec4, they first clamp the value to a range (depending on
			
 
				+  // the signedness) then pick the lower 8 bits, and drop the other bits.
			
 
				+  // The result is four 8-bit values (32 bits in total) which are packed in an
			
 
				+  // unsigned uint32_t.
			
 
				+  //
			
 
				+  // Note: uint8_t4_packed and int8_t4_packed are NOT vector types! They are
			
 
				+  // both scalar 32-bit unsigned integer types where each byte represents one
			
 
				+  // value.
			
 
				+  //
			
 
				+  // Note: In pack_clamp_{s|u}8 intrinsics, an input of 0x100 will be turned
			
 
				+  // into 0xFF, not 0x00. Therefore, it is important to perform a clamp first,
			
 
				+  // and then a truncation.
			
 
				+
			
 
				+  // Steps:
			
 
				+  // Use GLSL extended instruction set's clamp (only for clamp instructions).
			
 
				+  // Use OpUConvert/OpSConvert to truncate each element of the vec4 to 8 bits.
			
 
				+  // Use OpBitcast to make a 32-bit uint out of the new vec4.
			
 
				+  auto *arg = callExpr->getArg(0);
			
 
				+  const auto argType = arg->getType();
			
 
				+  SpirvInstruction *argInstr = doExpr(arg);
			
 
				+  QualType elemType = {};
			
 
				+  uint32_t elemCount = 0;
			
 
				+  (void)isVectorType(argType, &elemType, &elemCount);
			
 
				+  const bool isSigned = elemType->isSignedIntegerType();
			
 
				+  assert(elemCount == 4);
			
 
				+
			
 
				+  const bool doesClamp = op == hlsl::IntrinsicOp::IOP_pack_clamp_s8 ||
			
 
				+                         op == hlsl::IntrinsicOp::IOP_pack_clamp_u8;
			
 
				+  if (doesClamp) {
			
 
				+    const auto bitwidth = getElementSpirvBitwidth(
			
 
				+        astContext, elemType, spirvOptions.enable16BitTypes);
			
 
				+    int32_t clampMin = op == hlsl::IntrinsicOp::IOP_pack_clamp_u8 ? 0 : -128;
			
 
				+    int32_t clampMax = op == hlsl::IntrinsicOp::IOP_pack_clamp_u8 ? 255 : 127;
			
 
				+    auto *minInstr = spvBuilder.getConstantInt(
			
 
				+        elemType, llvm::APInt(bitwidth, clampMin, isSigned));
			
 
				+    auto *maxInstr = spvBuilder.getConstantInt(
			
 
				+        elemType, llvm::APInt(bitwidth, clampMax, isSigned));
			
 
				+    auto *minVec = spvBuilder.getConstantComposite(
			
 
				+        argType, {minInstr, minInstr, minInstr, minInstr});
			
 
				+    auto *maxVec = spvBuilder.getConstantComposite(
			
 
				+        argType, {maxInstr, maxInstr, maxInstr, maxInstr});
			
 
				+    auto clampOp = isSigned ? GLSLstd450SClamp : GLSLstd450UClamp;
			
 
				+    argInstr = spvBuilder.createGLSLExtInst(argType, clampOp,
			
 
				+                                            {argInstr, minVec, maxVec}, loc);
			
 
				+  }
			
 
				+
			
 
				+  if (isSigned) {
			
 
				+    QualType v4Int8Type =
			
 
				+        astContext.getExtVectorType(astContext.SignedCharTy, 4);
			
 
				+    auto *bytesVecInstr = spvBuilder.createUnaryOp(spv::Op::OpSConvert,
			
 
				+                                                   v4Int8Type, argInstr, loc);
			
 
				+    return spvBuilder.createUnaryOp(
			
 
				+        spv::Op::OpBitcast, astContext.Int8_4PackedTy, bytesVecInstr, loc);
			
 
				+  } else {
			
 
				+    QualType v4Uint8Type =
			
 
				+        astContext.getExtVectorType(astContext.UnsignedCharTy, 4);
			
 
				+    auto *bytesVecInstr = spvBuilder.createUnaryOp(spv::Op::OpUConvert,
			
 
				+                                                   v4Uint8Type, argInstr, loc);
			
 
				+    return spvBuilder.createUnaryOp(
			
 
				+        spv::Op::OpBitcast, astContext.UInt8_4PackedTy, bytesVecInstr, loc);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+SpirvInstruction *
			
 
				+SpirvEmitter::processIntrinsic8BitUnpack(const CallExpr *callExpr,
			
 
				+                                         hlsl::IntrinsicOp op) {
			
 
				+  const auto loc = callExpr->getExprLoc();
			
 
				+  assert(op == hlsl::IntrinsicOp::IOP_unpack_s8s16 ||
			
 
				+         op == hlsl::IntrinsicOp::IOP_unpack_s8s32 ||
			
 
				+         op == hlsl::IntrinsicOp::IOP_unpack_u8u16 ||
			
 
				+         op == hlsl::IntrinsicOp::IOP_unpack_u8u32);
			
 
				+
			
 
				+  // Here's the signature for the pack intrinsic operations:
			
 
				+  //
			
 
				+  // int16_t4 unpack_s8s16(int8_t4_packed packedVal);   // Sign Extended
			
 
				+  // uint16_t4 unpack_u8u16(uint8_t4_packed packedVal); // Non-Sign Extended
			
 
				+  // int32_t4 unpack_s8s32(int8_t4_packed packedVal);   // Sign Extended
			
 
				+  // uint32_t4 unpack_u8u32(uint8_t4_packed packedVal); // Non-Sign Extended
			
 
				+  //
			
 
				+  // These functions take a 32-bit unsigned integer as input (where each byte of
			
 
				+  // the input represents one value, i.e. it's packed). They first unpack the
			
 
				+  // 32-bit integer to a vector of 4 bytes. Then for each element of the vec4,
			
 
				+  // they zero-extend or sign-extend the byte in order to achieve a 16-bit or
			
 
				+  // 32-bit vector of integers.
			
 
				+  //
			
 
				+  // Note: uint8_t4_packed and int8_t4_packed are NOT vector types! They are
			
 
				+  // both scalar 32-bit unsigned integer types where each byte represents one
			
 
				+  // value.
			
 
				+
			
 
				+  // Steps:
			
 
				+  // Use OpBitcast to make a vec4 of bytes from a 32-bit value.
			
 
				+  // Use OpUConvert/OpSConvert to zero-extend/sign-extend each element of the
			
 
				+  // vec4 to 16 or 32 bits.
			
 
				+  auto *arg = callExpr->getArg(0);
			
 
				+  SpirvInstruction *argInstr = doExpr(arg);
			
 
				+
			
 
				+  const bool isSigned = op == hlsl::IntrinsicOp::IOP_unpack_s8s16 ||
			
 
				+                        op == hlsl::IntrinsicOp::IOP_unpack_s8s32;
			
 
				+
			
 
				+  QualType resultType = {};
			
 
				+  if (op == hlsl::IntrinsicOp::IOP_unpack_s8s16 ||
			
 
				+      op == hlsl::IntrinsicOp::IOP_unpack_u8u16) {
			
 
				+    resultType = astContext.getExtVectorType(
			
 
				+        isSigned ? astContext.ShortTy : astContext.UnsignedShortTy, 4);
			
 
				+  } else {
			
 
				+    resultType = astContext.getExtVectorType(
			
 
				+        isSigned ? astContext.IntTy : astContext.UnsignedIntTy, 4);
			
 
				+  }
			
 
				+
			
 
				+  if (isSigned) {
			
 
				+    QualType v4Int8Type =
			
 
				+        astContext.getExtVectorType(astContext.SignedCharTy, 4);
			
 
				+    auto *bytesVecInstr =
			
 
				+        spvBuilder.createUnaryOp(spv::Op::OpBitcast, v4Int8Type, argInstr, loc);
			
 
				+    return spvBuilder.createUnaryOp(spv::Op::OpSConvert, resultType,
			
 
				+                                    bytesVecInstr, loc);
			
 
				+  } else {
			
 
				+    QualType v4Uint8Type =
			
 
				+        astContext.getExtVectorType(astContext.UnsignedCharTy, 4);
			
 
				+    auto *bytesVecInstr = spvBuilder.createUnaryOp(spv::Op::OpBitcast,
			
 
				+                                                   v4Uint8Type, argInstr, loc);
			
 
				+    return spvBuilder.createUnaryOp(spv::Op::OpUConvert, resultType,
			
 
				+                                    bytesVecInstr, loc);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 SpirvInstruction *SpirvEmitter::processRayBuiltins(const CallExpr *callExpr,
			
 
				                                                    hlsl::IntrinsicOp op) {
			
 
				   bool nvRayTracing =
			
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@@ -549,6 +549,15 @@ private:
 
				   /// Processes the NonUniformResourceIndex intrinsic function.
			
 
				   SpirvInstruction *processIntrinsicNonUniformResourceIndex(const CallExpr *);
			
 
				 
			
 
				+  /// Processes the SM 6.6 pack_{s|u}8 and pack_clamp_{s|u}8 intrinsic
			
 
				+  /// functions.
			
 
				+  SpirvInstruction *processIntrinsic8BitPack(const CallExpr *,
			
 
				+                                             hlsl::IntrinsicOp);
			
 
				+
			
 
				+  /// Processes the SM 6.6 unpack_{s|u}8{s|u}{16|32} intrinsic functions.
			
 
				+  SpirvInstruction *processIntrinsic8BitUnpack(const CallExpr *,
			
 
				+                                               hlsl::IntrinsicOp);
			
 
				+
			
 
				   /// Process builtins specific to raytracing.
			
 
				   SpirvInstruction *processRayBuiltins(const CallExpr *, hlsl::IntrinsicOp op);
			
 
				 
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.pack_clamp_s8u8.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.pack_clamp_s8u8.hlsl
@@ -0,0 +1,66 @@
 
				+// Run: %dxc -E main -T ps_6_6 -enable-16bit-types
			
 
				+
			
 
				+float4 main(int16_t4 input1 : Inputs1, int16_t4 input2 : Inputs2) : SV_Target {
			
 
				+  int16_t4 v4int16_var;
			
 
				+  int32_t4 v4int32_var;
			
 
				+
			
 
				+// Note: pack_clamp_s8 and pack_clamp_u8 do NOT accept an unsigned argument.
			
 
				+
			
 
				+// CHECK:           [[glsl_set:%\d+]] = OpExtInstImport "GLSL.std.450"
			
 
				+
			
 
				+// CHECK:                      %short = OpTypeInt 16 1
			
 
				+// CHECK:                    %v4short = OpTypeVector %short 4
			
 
				+
			
 
				+// CHECK: [[const_v4short_n128:%\d+]] = OpConstantComposite %v4short %short_n128 %short_n128 %short_n128 %short_n128
			
 
				+// CHECK:  [[const_v4short_127:%\d+]] = OpConstantComposite %v4short %short_127 %short_127 %short_127 %short_127
			
 
				+
			
 
				+// CHECK:   [[const_v4int_n128:%\d+]] = OpConstantComposite %v4int %int_n128 %int_n128 %int_n128 %int_n128
			
 
				+// CHECK:    [[const_v4int_127:%\d+]] = OpConstantComposite %v4int %int_127 %int_127 %int_127 %int_127
			
 
				+
			
 
				+// CHECK:    [[const_v4short_0:%\d+]] = OpConstantComposite %v4short %short_0 %short_0 %short_0 %short_0
			
 
				+// CHECK:  [[const_v4short_255:%\d+]] = OpConstantComposite %v4short %short_255 %short_255 %short_255 %short_255
			
 
				+
			
 
				+// CHECK:      [[const_v4int_0:%\d+]] = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
			
 
				+// CHECK:    [[const_v4int_255:%\d+]] = OpConstantComposite %v4int %int_255 %int_255 %int_255 %int_255
			
 
				+
			
 
				+// CHECK:                       %char = OpTypeInt 8 1
			
 
				+// CHECK:                     %v4char = OpTypeVector %char 4
			
 
				+
			
 
				+  ////////////////////////////
			
 
				+  // pack_clamp_s8 variants //
			
 
				+  ////////////////////////////
			
 
				+
			
 
				+// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4short %v4int16_var
			
 
				+// CHECK:     [[clamped:%\d+]] = OpExtInst %v4short [[glsl_set]] SClamp [[v4int16_var]] [[const_v4short_n128]] [[const_v4short_127]]
			
 
				+// CHECK:   [[truncated:%\d+]] = OpSConvert %v4char [[clamped]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[truncated]]
			
 
				+// CHECK:                        OpStore %ps1 [[packed]]
			
 
				+  int8_t4_packed ps1 = pack_clamp_s8(v4int16_var);
			
 
				+
			
 
				+// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4int %v4int32_var
			
 
				+// CHECK:     [[clamped:%\d+]] = OpExtInst %v4int [[glsl_set]] SClamp [[v4int16_var]] [[const_v4int_n128]] [[const_v4int_127]]
			
 
				+// CHECK:   [[truncated:%\d+]] = OpSConvert %v4char [[clamped]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[truncated]]
			
 
				+// CHECK:                        OpStore %ps3 [[packed]]
			
 
				+  int8_t4_packed ps3 = pack_clamp_s8(v4int32_var);
			
 
				+
			
 
				+  ////////////////////////////
			
 
				+  // pack_clamp_u8 variants //
			
 
				+  ////////////////////////////
			
 
				+
			
 
				+// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4short %v4int16_var
			
 
				+// CHECK:     [[clamped:%\d+]] = OpExtInst %v4short [[glsl_set]] SClamp [[v4int16_var]] [[const_v4short_0]] [[const_v4short_255]]
			
 
				+// CHECK:   [[truncated:%\d+]] = OpSConvert %v4char [[clamped]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[truncated]]
			
 
				+// CHECK:                        OpStore %pu1 [[packed]]
			
 
				+  uint8_t4_packed pu1 = pack_clamp_u8(v4int16_var);
			
 
				+
			
 
				+// CHECK: [[v4int32_var:%\d+]] = OpLoad %v4int %v4int32_var
			
 
				+// CHECK:     [[clamped:%\d+]] = OpExtInst %v4int [[glsl_set]] SClamp [[v4int32_var]] [[const_v4int_0]] [[const_v4int_255]]
			
 
				+// CHECK:   [[truncated:%\d+]] = OpSConvert %v4char [[clamped]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[truncated]]
			
 
				+// CHECK:                        OpStore %pu3 [[packed]]
			
 
				+  uint8_t4_packed pu3 = pack_clamp_u8(v4int32_var);
			
 
				+
			
 
				+  return 0.xxxx;
			
 
				+}
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.pack_s8u8.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.pack_s8u8.hlsl
@@ -0,0 +1,75 @@
 
				+// Run: %dxc -E main -T ps_6_6 -enable-16bit-types
			
 
				+
			
 
				+// CHECK:    %short = OpTypeInt 16 1
			
 
				+// CHECK:  %v4short = OpTypeVector %short 4
			
 
				+// CHECK:   %ushort = OpTypeInt 16 0
			
 
				+// CHECK: %v4ushort = OpTypeVector %ushort 4
			
 
				+// CHECK:     %char = OpTypeInt 8 1
			
 
				+// CHECK:   %v4char = OpTypeVector %char 4
			
 
				+// CHECK:    %uchar = OpTypeInt 8 0
			
 
				+// CHECK:  %v4uchar = OpTypeVector %uchar 4
			
 
				+
			
 
				+float4 main(int16_t4 input1 : Inputs1, int16_t4 input2 : Inputs2) : SV_Target {
			
 
				+  int16_t4 v4int16_var;
			
 
				+  uint16_t4 v4uint16_var;
			
 
				+
			
 
				+  int32_t4 v4int32_var;
			
 
				+  uint32_t4 v4uint32_var;
			
 
				+
			
 
				+  //////////////////////
			
 
				+  // pack_s8 variants //
			
 
				+  //////////////////////
			
 
				+
			
 
				+// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4short %v4int16_var
			
 
				+// CHECK:   [[bytes_vec:%\d+]] = OpSConvert %v4char [[v4int16_var]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                        OpStore %ps1 [[packed]]
			
 
				+  int8_t4_packed ps1 = pack_s8(v4int16_var);
			
 
				+
			
 
				+// CHECK: [[v4uint16_var:%\d+]] = OpLoad %v4ushort %v4uint16_var
			
 
				+// CHECK:    [[bytes_vec:%\d+]] = OpUConvert %v4uchar [[v4uint16_var]]
			
 
				+// CHECK:       [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                         OpStore %ps2 [[packed]]
			
 
				+  int8_t4_packed ps2 = pack_s8(v4uint16_var);
			
 
				+
			
 
				+// CHECK: [[v4int32_var:%\d+]] = OpLoad %v4int %v4int32_var
			
 
				+// CHECK:   [[bytes_vec:%\d+]] = OpSConvert %v4char [[v4int32_var]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                        OpStore %ps3 [[packed]]
			
 
				+  int8_t4_packed ps3 = pack_s8(v4int32_var);
			
 
				+// CHECK: [[v4uint32_var:%\d+]] = OpLoad %v4uint %v4uint32_var
			
 
				+// CHECK:    [[bytes_vec:%\d+]] = OpUConvert %v4uchar [[v4uint32_var]]
			
 
				+// CHECK:       [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                         OpStore %ps4 [[packed]]
			
 
				+  int8_t4_packed ps4 = pack_s8(v4uint32_var);
			
 
				+
			
 
				+  //////////////////////
			
 
				+  // pack_u8 variants //
			
 
				+  //////////////////////
			
 
				+
			
 
				+// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4short %v4int16_var
			
 
				+// CHECK:   [[bytes_vec:%\d+]] = OpSConvert %v4char [[v4int16_var]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                        OpStore %pu1 [[packed]]
			
 
				+  uint8_t4_packed pu1 = pack_u8(v4int16_var);
			
 
				+
			
 
				+// CHECK: [[v4uint16_var:%\d+]] = OpLoad %v4ushort %v4uint16_var
			
 
				+// CHECK:    [[bytes_vec:%\d+]] = OpUConvert %v4uchar [[v4uint16_var]]
			
 
				+// CHECK:       [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                         OpStore %pu2 [[packed]]
			
 
				+  uint8_t4_packed pu2 = pack_u8(v4uint16_var);
			
 
				+
			
 
				+// CHECK: [[v4int32_var:%\d+]] = OpLoad %v4int %v4int32_var
			
 
				+// CHECK:   [[bytes_vec:%\d+]] = OpSConvert %v4char [[v4int32_var]]
			
 
				+// CHECK:      [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                        OpStore %pu3 [[packed]]
			
 
				+  uint8_t4_packed pu3 = pack_u8(v4int32_var);
			
 
				+
			
 
				+// CHECK: [[v4uint32_var:%\d+]] = OpLoad %v4uint %v4uint32_var
			
 
				+// CHECK:    [[bytes_vec:%\d+]] = OpUConvert %v4uchar [[v4uint32_var]]
			
 
				+// CHECK:       [[packed:%\d+]] = OpBitcast %uint [[bytes_vec]]
			
 
				+// CHECK:                         OpStore %pu4 [[packed]]
			
 
				+  uint8_t4_packed pu4 = pack_u8(v4uint32_var);
			
 
				+
			
 
				+  return 0.xxxx;
			
 
				+}
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.unpack.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.unpack.hlsl
@@ -0,0 +1,58 @@
 
				+// Run: %dxc -E main -T ps_6_6 -enable-16bit-types
			
 
				+
			
 
				+float4 main(int16_t4 input1 : Inputs1, int16_t4 input2 : Inputs2) : SV_Target {
			
 
				+  // Note: both int8_t4_packed and uint8_t4_packed are represented as
			
 
				+  // 32-bit unsigned integers in SPIR-V.
			
 
				+  int8_t4_packed signedPacked;
			
 
				+  uint8_t4_packed unsignedPacked;
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %unsignedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4char [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpSConvert %v4short [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up1 [[unpacked]]
			
 
				+  int16_t4 up1 = unpack_s8s16(unsignedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %signedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4char [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpSConvert %v4short [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up2 [[unpacked]]
			
 
				+  int16_t4 up2 = unpack_s8s16(signedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %unsignedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4char [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpSConvert %v4int [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up3 [[unpacked]]
			
 
				+  int32_t4 up3 = unpack_s8s32(unsignedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %signedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4char [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpSConvert %v4int [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up4 [[unpacked]]
			
 
				+  int32_t4 up4 = unpack_s8s32(signedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %unsignedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4uchar [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpUConvert %v4ushort [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up5 [[unpacked]]
			
 
				+  uint16_t4 up5 = unpack_u8u16(unsignedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %signedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4uchar [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpUConvert %v4ushort [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up6 [[unpacked]]
			
 
				+  uint16_t4 up6 = unpack_u8u16(signedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %unsignedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4uchar [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpUConvert %v4uint [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up7 [[unpacked]]
			
 
				+  uint32_t4 up7 = unpack_u8u32(unsignedPacked);
			
 
				+
			
 
				+// CHECK:    [[packed:%\d+]] = OpLoad %uint %signedPacked
			
 
				+// CHECK: [[bytes_vec:%\d+]] = OpBitcast %v4uchar [[packed]]
			
 
				+// CHECK:  [[unpacked:%\d+]] = OpUConvert %v4uint [[bytes_vec]]
			
 
				+// CHECK:                      OpStore %up8 [[unpacked]]
			
 
				+  uint32_t4 up8 = unpack_u8u32(signedPacked);
			
 
				+
			
 
				+  return 0.xxxx;
			
 
				+}
			
--- a/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp
+++ b/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp
@@ -1267,6 +1267,17 @@ TEST_F(FileTest, IntrinsicsVkReadClock) {
 
				   runFileTest("intrinsics.vkreadclock.hlsl");
			
 
				 }
			
 
				 
			
 
				+// Intrinsics added in SM 6.6
			
 
				+TEST_F(FileTest, IntrinsicsSM66PackU8S8) {
			
 
				+  runFileTest("intrinsics.sm6_6.pack_s8u8.hlsl");
			
 
				+}
			
 
				+TEST_F(FileTest, IntrinsicsSM66PackClampU8S8) {
			
 
				+  runFileTest("intrinsics.sm6_6.pack_clamp_s8u8.hlsl");
			
 
				+}
			
 
				+TEST_F(FileTest, IntrinsicsSM66Unpack) {
			
 
				+  runFileTest("intrinsics.sm6_6.unpack.hlsl");
			
 
				+}
			
 
				+
			
 
				 // For attributes
			
 
				 TEST_F(FileTest, AttributeEarlyDepthStencil) {
			
 
				   runFileTest("attribute.earlydepthstencil.ps.hlsl");