浏览代码

[spirv] Translate intrinsic Interlocked* methods. (#722)

Ehsan 7 年之前
父节点
当前提交
a62dba918f

+ 5 - 0
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -156,6 +156,11 @@ public:
                                        uint32_t unequalMemorySemanticsId,
                                        uint32_t valueToOp, uint32_t comparator);
 
+  /// \brief Creates an OpImageTexelPointer SPIR-V instruction with the given
+  /// parameters.
+  uint32_t createImageTexelPointer(uint32_t resultType, uint32_t imageId,
+                                   uint32_t coordinate, uint32_t sample);
+
   /// \brief Creates SPIR-V instructions for sampling the given image.
   ///
   /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample*

+ 12 - 0
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -331,6 +331,18 @@ spv::ImageOperandsMask ModuleBuilder::composeImageOperandsMask(
   return mask;
 }
 
+uint32_t ModuleBuilder::createImageTexelPointer(uint32_t resultType,
+                                                uint32_t imageId,
+                                                uint32_t coordinate,
+                                                uint32_t sample) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder.opImageTexelPointer(resultType, id, imageId, coordinate, sample)
+      .x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
 uint32_t ModuleBuilder::createImageSample(
     uint32_t texelType, uint32_t imageType, uint32_t image, uint32_t sampler,
     uint32_t coordinate, uint32_t compareVal, uint32_t bias, uint32_t lod,

+ 140 - 6
tools/clang/lib/SPIRV/SPIRVEmitter.cpp

@@ -203,28 +203,37 @@ bool spirvToolsOptimize(std::vector<uint32_t> *module, std::string *messages) {
   return optimizer.Run(module->data(), module->size(), module);
 }
 
-/// Translates RWByteAddressBuffer atomic method opcode into SPIR-V opcode.
-spv::Op translateRWBABufferAtomicMethods(hlsl::IntrinsicOp opcode) {
+/// Translates atomic HLSL opcodes into the equivalent SPIR-V opcode.
+spv::Op translateAtomicHlslOpcodeToSpirvOpcode(hlsl::IntrinsicOp opcode) {
   using namespace hlsl;
   using namespace spv;
 
   switch (opcode) {
+  case IntrinsicOp::IOP_InterlockedAdd:
   case IntrinsicOp::MOP_InterlockedAdd:
     return Op::OpAtomicIAdd;
+  case IntrinsicOp::IOP_InterlockedAnd:
   case IntrinsicOp::MOP_InterlockedAnd:
     return Op::OpAtomicAnd;
+  case IntrinsicOp::IOP_InterlockedOr:
   case IntrinsicOp::MOP_InterlockedOr:
     return Op::OpAtomicOr;
+  case IntrinsicOp::IOP_InterlockedXor:
   case IntrinsicOp::MOP_InterlockedXor:
     return Op::OpAtomicXor;
+  case IntrinsicOp::IOP_InterlockedUMax:
   case IntrinsicOp::MOP_InterlockedUMax:
     return Op::OpAtomicUMax;
+  case IntrinsicOp::IOP_InterlockedUMin:
   case IntrinsicOp::MOP_InterlockedUMin:
     return Op::OpAtomicUMin;
+  case IntrinsicOp::IOP_InterlockedMax:
   case IntrinsicOp::MOP_InterlockedMax:
     return Op::OpAtomicSMax;
+  case IntrinsicOp::IOP_InterlockedMin:
   case IntrinsicOp::MOP_InterlockedMin:
     return Op::OpAtomicSMin;
+  case IntrinsicOp::IOP_InterlockedExchange:
   case IntrinsicOp::MOP_InterlockedExchange:
     return Op::OpAtomicExchange;
   }
@@ -1588,9 +1597,9 @@ uint32_t SPIRVEmitter::processRWByteAddressBufferAtomicMethods(
       theBuilder.createStore(doExpr(expr->getArg(3)), originalVal);
   } else {
     const uint32_t value = doExpr(expr->getArg(1));
-    const uint32_t originalVal =
-        theBuilder.createAtomicOp(translateRWBABufferAtomicMethods(opcode),
-                                  uintType, ptr, scope, zero, value);
+    const uint32_t originalVal = theBuilder.createAtomicOp(
+        translateAtomicHlslOpcodeToSpirvOpcode(opcode), uintType, ptr, scope,
+        zero, value);
     if (expr->getNumArgs() > 2)
       theBuilder.createStore(doExpr(expr->getArg(2)), originalVal);
   }
@@ -3748,7 +3757,19 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
     return processIntrinsicUsingGLSLInst(callExpr, glslOpcode, doEachVec);     \
   } break
 
-  switch (static_cast<hlsl::IntrinsicOp>(opcode)) {
+  switch (const auto hlslOpcode = static_cast<hlsl::IntrinsicOp>(opcode)) {
+  case hlsl::IntrinsicOp::IOP_InterlockedAdd:
+  case hlsl::IntrinsicOp::IOP_InterlockedAnd:
+  case hlsl::IntrinsicOp::IOP_InterlockedMax:
+  case hlsl::IntrinsicOp::IOP_InterlockedUMax:
+  case hlsl::IntrinsicOp::IOP_InterlockedMin:
+  case hlsl::IntrinsicOp::IOP_InterlockedUMin:
+  case hlsl::IntrinsicOp::IOP_InterlockedOr:
+  case hlsl::IntrinsicOp::IOP_InterlockedXor:
+  case hlsl::IntrinsicOp::IOP_InterlockedExchange:
+  case hlsl::IntrinsicOp::IOP_InterlockedCompareStore:
+  case hlsl::IntrinsicOp::IOP_InterlockedCompareExchange:
+    return processIntrinsicInterlockedMethod(callExpr, hlslOpcode);
   case hlsl::IntrinsicOp::IOP_dot:
     return processIntrinsicDot(callExpr);
   case hlsl::IntrinsicOp::IOP_mul:
@@ -3869,6 +3890,119 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   return 0;
 }
 
+uint32_t
+SPIRVEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
+                                                hlsl::IntrinsicOp opcode) {
+  // The signature of intrinsic atomic methods are:
+  // void Interlocked*(in R dest, in T value);
+  // void Interlocked*(in R dest, in T value, out T original_value);
+
+  // Note: ALL Interlocked*() methods are forced to have an unsigned integer
+  // 'value'. Meaning, T is forced to be 'unsigned int'. If the provided
+  // parameter is not an unsigned integer, the frontend inserts an
+  // 'ImplicitCastExpr' to convert it to unsigned integer. OpAtomicIAdd (and
+  // other SPIR-V OpAtomic* instructions) require that the pointee in 'dest' to
+  // be of the same type as T. This will result in an invalid SPIR-V if 'dest'
+  // is a signed integer typed resource such as RWTexture1D<int>. For example,
+  // the following OpAtomicIAdd is invalid because the pointee type defined in
+  // %1 is a signed integer, while the value passed to atomic add (%3) is an
+  // unsigned integer.
+  //
+  //  %_ptr_Image_int = OpTypePointer Image %int
+  //  %1 = OpImageTexelPointer %_ptr_Image_int %RWTexture1D_int %index %uint_0
+  //  %2 = OpLoad %int %value
+  //  %3 = OpBitcast %uint %2   <-------- Inserted by the frontend
+  //  %4 = OpAtomicIAdd %int %1 %uint_1 %uint_0 %3
+  //
+  // In such cases, we bypass the forced IntegralCast.
+  // Moreover, the frontend does not add a cast AST node to cast uint to int
+  // where necessary. To ensure SPIR-V validity, we add that where necessary.
+
+  const uint32_t zero = theBuilder.getConstantUint32(0);
+  const uint32_t scope = theBuilder.getConstantUint32(1); // Device
+  const auto *dest = expr->getArg(0);
+  const auto baseType = dest->getType();
+  const uint32_t baseTypeId = typeTranslator.translateType(baseType);
+
+  const auto doArg = [baseType, this](const CallExpr *callExpr,
+                                      uint32_t argIndex) {
+    const Expr *valueExpr = callExpr->getArg(argIndex);
+    if (const auto *castExpr = dyn_cast<ImplicitCastExpr>(valueExpr))
+      if (castExpr->getCastKind() == CK_IntegralCast &&
+          castExpr->getSubExpr()->getType() == baseType)
+        valueExpr = castExpr->getSubExpr();
+
+    uint32_t argId = doExpr(valueExpr);
+    if (valueExpr->getType() != baseType)
+      argId = castToInt(argId, valueExpr->getType(), baseType);
+    return argId;
+  };
+
+  const auto writeToOutputArg = [&baseType, this](uint32_t toWrite,
+                                                  const CallExpr *callExpr,
+                                                  uint32_t outputArgIndex) {
+    const auto outputArg = callExpr->getArg(outputArgIndex);
+    const auto outputArgType = outputArg->getType();
+    if (baseType != outputArgType)
+      toWrite = castToInt(toWrite, baseType, outputArgType);
+    theBuilder.createStore(doExpr(outputArg), toWrite);
+  };
+
+  // If the argument is indexing into a texture/buffer, we need to create an
+  // OpImageTexelPointer instruction.
+  uint32_t ptr = 0;
+  if (const auto *callExpr = dyn_cast<CXXOperatorCallExpr>(dest)) {
+    const Expr *base = nullptr;
+    const Expr *index = nullptr;
+    if (isBufferTextureIndexing(callExpr, &base, &index)) {
+      const auto ptrType =
+          theBuilder.getPointerType(baseTypeId, spv::StorageClass::Image);
+      const auto baseId = doExpr(base);
+      const auto coordId = doExpr(index);
+      ptr = theBuilder.createImageTexelPointer(ptrType, baseId, coordId, zero);
+    }
+  } else {
+    ptr = doExpr(dest);
+  }
+
+  const bool isCompareExchange =
+      opcode == hlsl::IntrinsicOp::IOP_InterlockedCompareExchange;
+  const bool isCompareStore =
+      opcode == hlsl::IntrinsicOp::IOP_InterlockedCompareStore;
+
+  if (isCompareExchange || isCompareStore) {
+    const uint32_t comparator = doArg(expr, 1);
+    const uint32_t valueId = doArg(expr, 2);
+    const uint32_t originalVal = theBuilder.createAtomicCompareExchange(
+        baseTypeId, ptr, scope, zero, zero, valueId, comparator);
+    if (isCompareExchange)
+      writeToOutputArg(originalVal, expr, 3);
+  } else {
+    const uint32_t valueId = doArg(expr, 1);
+    // Since these atomic operations write through the provided pointer, the
+    // signed vs. unsigned opcode must be decided based on the pointee type
+    // of the first argument. However, the frontend decides the opcode based on
+    // the second argument (value). Therefore, the HLSL opcode provided by the
+    // frontend may be wrong. Therefore we need the following code to make sure
+    // we are using the correct SPIR-V opcode.
+    spv::Op atomicOp = translateAtomicHlslOpcodeToSpirvOpcode(opcode);
+    if (atomicOp == spv::Op::OpAtomicUMax && baseType->isSignedIntegerType())
+      atomicOp = spv::Op::OpAtomicSMax;
+    if (atomicOp == spv::Op::OpAtomicSMax && baseType->isUnsignedIntegerType())
+      atomicOp = spv::Op::OpAtomicUMax;
+    if (atomicOp == spv::Op::OpAtomicUMin && baseType->isSignedIntegerType())
+      atomicOp = spv::Op::OpAtomicSMin;
+    if (atomicOp == spv::Op::OpAtomicSMin && baseType->isUnsignedIntegerType())
+      atomicOp = spv::Op::OpAtomicUMin;
+    const uint32_t originalVal = theBuilder.createAtomicOp(
+        atomicOp, baseTypeId, ptr, scope, zero, valueId);
+    if (expr->getNumArgs() > 2)
+      writeToOutputArg(originalVal, expr, 2);
+  }
+
+  return 0;
+}
+
 uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
   // Signature is: ret modf(x, ip)
   // [in]    x: the input floating-point value.

+ 4 - 0
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -317,6 +317,10 @@ private:
   SpirvEvalInfo processIntrinsicMemberCall(const CXXMemberCallExpr *expr,
                                            hlsl::IntrinsicOp opcode);
 
+  /// Processes Interlocked* intrinsic functions.
+  uint32_t processIntrinsicInterlockedMethod(const CallExpr *,
+                                             hlsl::IntrinsicOp);
+
 private:
   /// Returns the <result-id> for constant value 0 of the given type.
   uint32_t getValueZero(QualType type);

+ 287 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.hlsl

@@ -0,0 +1,287 @@
+// Run: %dxc -T ps_6_0 -E main
+
+RWTexture1D <int>   g_tTex1di1;
+RWTexture1D <uint>  g_tTex1du1;
+
+RWTexture2D <int>   g_tTex2di1;
+RWTexture2D <uint>  g_tTex2du1;
+
+RWTexture3D <int>   g_tTex3di1;
+RWTexture3D <uint>  g_tTex3du1;
+
+RWTexture1DArray <int>   g_tTex1di1a;
+RWTexture1DArray <uint>  g_tTex1du1a;
+
+RWTexture2DArray <int>   g_tTex2di1a;
+RWTexture2DArray <uint>  g_tTex2du1a;
+
+RWBuffer <int>   g_tBuffI;
+RWBuffer <uint>  g_tBuffU;
+
+void main()
+{
+  uint out_u1;
+  int out_i1;
+
+  uint  u1;
+  uint2 u2;
+  uint3 u3;
+  uint  u1b;
+  uint  u1c;
+
+  int   i1;
+  int2  i2;
+  int3  i3;
+  int   i1b;
+  int   i1c;
+
+  ////////////////////////////////////////////////////////////////////
+  /////   Test that type mismatches are resolved correctly    ////////
+  ////////////////////////////////////////////////////////////////////
+
+// CHECK:         [[idx0:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT:    [[ptr0:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 [[idx0]] %uint_0
+// CHECK-NEXT:    [[i1_0:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT:   [[iadd0:%\d+]] = OpAtomicIAdd %int [[ptr0]] %uint_1 %uint_0 [[i1_0]]
+// CHECK-NEXT: [[iadd0_u:%\d+]] = OpBitcast %uint [[iadd0]]
+// CHECK-NEXT:                    OpStore %out_u1 [[iadd0_u]]
+  InterlockedAdd(g_tTex1di1[u1], i1, out_u1); // Addition result must be cast to uint before being written to out_u1
+
+
+// CHECK:        [[ptr1:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT:   [[u1_1:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT: [[u1_int:%\d+]] = OpBitcast %int [[u1_1]]
+// CHECK-NEXT:  [[iadd1:%\d+]] = OpAtomicIAdd %int [[ptr1]] %uint_1 %uint_0 [[u1_int]]
+// CHECK-NEXT:                   OpStore %out_i1 [[iadd1]]
+  InterlockedAdd(g_tTex1di1[u1], u1, out_i1); // u1 should be cast to int before being passed to addition instruction
+
+// CHECK:         [[ptr2:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT:    [[i1_2:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[i1_uint:%\d+]] = OpBitcast %uint [[i1_2]]
+// CHECK-NEXT:   [[iadd2:%\d+]] = OpAtomicIAdd %uint [[ptr2]] %uint_1 %uint_0 [[i1_uint]]
+// CHECK-NEXT:                    OpStore %out_u1 [[iadd2]]
+  InterlockedAdd(g_tTex1du1[u1], i1, out_u1); // i1 should be cast to uint before being passed to addition instruction
+
+// CHECK:           [[ptr3:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT:      [[u1_3:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT:     [[iadd3:%\d+]] = OpAtomicIAdd %uint [[ptr3]] %uint_1 %uint_0 [[u1_3]]
+// CHECK-NEXT: [[iadd3_int:%\d+]] = OpBitcast %int [[iadd3]]
+// CHECK-NEXT:                      OpStore %out_i1 [[iadd3_int]]
+  InterlockedAdd(g_tTex1du1[u1], u1, out_i1); // Addition result must be cast to int before being written to out_i1
+
+
+// CHECK:           [[ptr4:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT:     [[u1b_4:%\d+]] = OpLoad %uint %u1b
+// CHECK-NEXT: [[u1b_4_int:%\d+]] = OpBitcast %int [[u1b_4]]
+// CHECK-NEXT:     [[i1c_4:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT:      [[ace4:%\d+]] = OpAtomicCompareExchange %int [[ptr4]] %uint_1 %uint_0 %uint_0 [[i1c_4]] [[u1b_4_int]]
+// CHECK-NEXT:                      OpStore %out_i1 [[ace4]]
+  InterlockedCompareExchange(g_tTex1di1[u1], u1b, i1c, out_i1); // u1b should first be cast to int
+
+
+// CHECK:           [[ptr5:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT:     [[i1b_5:%\d+]] = OpLoad %int %i1b
+// CHECK-NEXT:     [[u1c_5:%\d+]] = OpLoad %uint %u1c
+// CHECK-NEXT: [[u1c_5_int:%\d+]] = OpBitcast %int [[u1c_5]]
+// CHECK-NEXT:      [[ace5:%\d+]] = OpAtomicCompareExchange %int [[ptr5]] %uint_1 %uint_0 %uint_0 [[u1c_5_int]] [[i1b_5]]
+// CHECK-NEXT:                      OpStore %out_i1 [[ace5]]
+  InterlockedCompareExchange(g_tTex1di1[u1], i1b, u1c, out_i1); // u1c should first be cast to int
+
+// CHECK:           [[ptr6:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT:     [[i1b_6:%\d+]] = OpLoad %int %i1b
+// CHECK-NEXT:     [[i1c_6:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT:      [[ace6:%\d+]] = OpAtomicCompareExchange %int [[ptr6]] %uint_1 %uint_0 %uint_0 [[i1c_6]] [[i1b_6]]
+// CHECK-NEXT: [[ace6_uint:%\d+]] = OpBitcast %uint [[ace6]]
+// CHECK-NEXT:                      OpStore %out_u1 [[ace6_uint]]
+  InterlockedCompareExchange(g_tTex1di1[u1], i1b, i1c, out_u1); // original value must be cast to uint before being written to out_u1
+
+// CHECK:            [[ptr7:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT:      [[u1b_7:%\d+]] = OpLoad %uint %u1b
+// CHECK-NEXT:      [[i1c_7:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT: [[i1c_7_uint:%\d+]] = OpBitcast %uint [[i1c_7]]
+// CHECK-NEXT:       [[ace7:%\d+]] = OpAtomicCompareExchange %uint [[ptr7]] %uint_1 %uint_0 %uint_0 [[i1c_7_uint]] [[u1b_7]]
+// CHECK-NEXT:                       OpStore %out_u1 [[ace7]]
+  InterlockedCompareExchange(g_tTex1du1[u1], u1b, i1c, out_u1); // i1c should first be cast to uint
+
+
+// CHECK:            [[ptr8:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT:      [[i1b_8:%\d+]] = OpLoad %int %i1b
+// CHECK-NEXT: [[i1b_8_uint:%\d+]] = OpBitcast %uint [[i1b_8]]
+// CHECK-NEXT:      [[u1c_8:%\d+]] = OpLoad %uint %u1c
+// CHECK-NEXT:       [[ace8:%\d+]] = OpAtomicCompareExchange %uint [[ptr8]] %uint_1 %uint_0 %uint_0 [[u1c_8]] [[i1b_8_uint]]
+// CHECK-NEXT:                       OpStore %out_u1 [[ace8]]
+  InterlockedCompareExchange(g_tTex1du1[u1], i1b, u1c, out_u1); // i1b should first be cast to uint
+
+
+// CHECK:          [[ptr9:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT:    [[u1b_9:%\d+]] = OpLoad %uint %u1b
+// CHECK-NEXT:    [[u1c_9:%\d+]] = OpLoad %uint %u1c
+// CHECK-NEXT:     [[ace9:%\d+]] = OpAtomicCompareExchange %uint [[ptr9]] %uint_1 %uint_0 %uint_0 [[u1c_9]] [[u1b_9]]
+// CHECK-NEXT: [[ace9_int:%\d+]] = OpBitcast %int [[ace9]]
+// CHECK-NEXT:                     OpStore %out_i1 [[ace9_int]]
+  InterlockedCompareExchange(g_tTex1du1[u1], u1b, u1c, out_i1); // original value must be cast to int before being written to out_i1
+
+
+//CHECK:             [[ptr10:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+//CHECK-NEXT:        [[u1_10:%\d+]] = OpLoad %uint %u1
+//CHECK-NEXT:    [[u1_10_int:%\d+]] = OpBitcast %int [[u1_10]]
+//CHECK-NEXT:      [[asmax10:%\d+]] = OpAtomicSMax %int [[ptr10]] %uint_1 %uint_0 [[u1_10_int]]
+//CHECK-NEXT: [[asmax10_uint:%\d+]] = OpBitcast %uint [[asmax10]]
+//CHECK-NEXT:                         OpStore %out_u1 [[asmax10_uint]]
+  // u1 should be cast to int first.
+  // AtomicSMax should be performed.
+  // Result should be cast to uint before being written to out_u1.
+  InterlockedMax(g_tTex1di1[u1], u1, out_u1);
+
+
+// CHECK:      [[ptr11:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1_11:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[i1_11_uint:%\d+]] = OpBitcast %uint [[i1_11]]
+// CHECK-NEXT: [[aumin11:%\d+]] = OpAtomicUMin %uint [[ptr11]] %uint_1 %uint_0 [[i1_11_uint]]
+// CHECK-NEXT: [[aumin11_int:%\d+]] = OpBitcast %int [[aumin11]]
+// CHECK-NEXT: OpStore %out_i1 [[aumin11_int]]
+  // i1 should be cast to uint first.
+  // AtomicUMin should be performed.
+  // Result should be cast to int before being written to out_i1.
+  InterlockedMin(g_tTex1du1[u1], i1, out_i1);
+
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  /////    Test all Interlocked* functions on various resource types   ////////
+  /////////////////////////////////////////////////////////////////////////////
+
+// CHECK:      [[ptr12:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1_12:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT:       {{%\d+}} = OpAtomicIAdd %int [[ptr12]] %uint_1 %uint_0 [[i1_12]]
+  InterlockedAdd            (g_tTex1di1[u1], i1);
+
+// CHECK:       [[ptr13:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT:  [[i1_13:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[iadd13:%\d+]] = OpAtomicIAdd %int [[ptr13]] %uint_1 %uint_0 [[i1_13]]
+// CHECK-NEXT:                   OpStore %out_i1 [[iadd13]]
+  InterlockedAdd            (g_tTex1di1[u1], i1, out_i1);
+
+// CHECK:      [[ptr14:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1_14:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT:       {{%\d+}} = OpAtomicAnd %int [[ptr14]] %uint_1 %uint_0 [[i1_14]]
+  InterlockedAnd            (g_tTex1di1[u1], i1);
+
+// CHECK:      [[ptr15:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1_15:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[and15:%\d+]] = OpAtomicAnd %int [[ptr15]] %uint_1 %uint_0 [[i1_15]]
+// CHECK-NEXT:                  OpStore %out_i1 [[and15]]
+  InterlockedAnd            (g_tTex1di1[u1], i1, out_i1);
+
+// CHECK:      [[ptr16:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[u1_16:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT: {{%\d+}} = OpAtomicUMax %uint [[ptr16]] %uint_1 %uint_0 [[u1_16]]
+  InterlockedMax(g_tTex1du1[u1], u1);
+
+// CHECK:        [[u2_17:%\d+]] = OpLoad %v2uint %u2
+// CHECK-NEXT:   [[ptr17:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex2di1 [[u2_17]] %uint_0
+// CHECK-NEXT:   [[i1_17:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[asmax17:%\d+]] = OpAtomicSMax %int [[ptr17]] %uint_1 %uint_0 [[i1_17]]
+// CHECK-NEXT:                    OpStore %out_i1 [[asmax17]]
+  InterlockedMax(g_tTex2di1[u2], i1, out_i1);
+
+// CHECK:      [[ptr18:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex2du1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[u1_18:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT:       {{%\d+}} = OpAtomicUMin %uint [[ptr18]] %uint_1 %uint_0 [[u1_18]]
+  InterlockedMin(g_tTex2du1[u2], u1);
+
+// CHECK:        [[u3_19:%\d+]] = OpLoad %v3uint %u3
+// CHECK-NEXT:   [[ptr19:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex3di1 [[u3_19]] %uint_0
+// CHECK-NEXT:   [[i1_19:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[asmin19:%\d+]] = OpAtomicSMin %int [[ptr19]] %uint_1 %uint_0 [[i1_19]]
+// CHECK-NEXT:                    OpStore %out_i1 [[asmin19]]
+  InterlockedMin(g_tTex3di1[u3], i1, out_i1);
+
+// CHECK:      [[ptr20:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex3du1 {{%\d+}} %uint_0
+// CHECK-NEXT: [[u1_20:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT:       {{%\d+}} = OpAtomicOr %uint [[ptr20]] %uint_1 %uint_0 [[u1_20]]
+  InterlockedOr (g_tTex3du1[u3], u1);
+
+// CHECK:      [[ptr21:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1a {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1_21:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT:  [[or21:%\d+]] = OpAtomicOr %int [[ptr21]] %uint_1 %uint_0 [[i1_21]]
+// CHECK-NEXT:                  OpStore %out_i1 [[or21]]
+  InterlockedOr (g_tTex1di1a[u2], i1, out_i1);
+
+// CHECK:      [[ptr22:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1a {{%\d+}} %uint_0
+// CHECK-NEXT: [[u1_22:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT:       {{%\d+}} = OpAtomicXor %uint [[ptr22]] %uint_1 %uint_0 [[u1_22]]
+  InterlockedXor(g_tTex1du1a[u2], u1);
+
+// CHECK:      [[ptr23:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tTex1di1a {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1_23:%\d+]] = OpLoad %int %i1
+// CHECK-NEXT: [[xor23:%\d+]] = OpAtomicXor %int [[ptr23]] %uint_1 %uint_0 [[i1_23]]
+// CHECK-NEXT:                  OpStore %out_i1 [[xor23]]
+  InterlockedXor(g_tTex1di1a[u2], i1, out_i1);
+
+// CHECK:       [[ptr24:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tTex1du1a {{%\d+}} %uint_0
+// CHECK-NEXT:  [[u1_24:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT: [[u1b_24:%\d+]] = OpLoad %uint %u1b
+// CHECK-NEXT:        {{%\d+}} = OpAtomicCompareExchange %uint [[ptr24]] %uint_1 %uint_0 %uint_0 [[u1b_24]] [[u1_24]]
+  InterlockedCompareStore(g_tTex1du1a[u2], u1, u1b);
+
+// CHECK:       [[ptr25:%\d+]] = OpImageTexelPointer %_ptr_Image_int %g_tBuffI {{%\d+}} %uint_0
+// CHECK-NEXT: [[i1b_25:%\d+]] = OpLoad %int %i1b
+// CHECK-NEXT: [[i1c_25:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT:  [[ace25:%\d+]] = OpAtomicCompareExchange %int [[ptr25]] %uint_1 %uint_0 %uint_0 [[i1c_25]] [[i1b_25]]
+// CHECK-NEXT:                   OpStore %out_i1 [[ace25]]
+  InterlockedCompareExchange(g_tBuffI[u1], i1b, i1c, out_i1);
+
+// CHECK:      [[ptr26:%\d+]] = OpImageTexelPointer %_ptr_Image_uint %g_tBuffU {{%\d+}} %uint_0
+// CHECK-NEXT: [[u1_26:%\d+]] = OpLoad %uint %u1
+// CHECK-NEXT:  [[ae26:%\d+]] = OpAtomicExchange %uint [[ptr26]] %uint_1 %uint_0 [[u1_26]]
+// CHECK-NEXT:                  OpStore %out_u1 [[ae26]]
+  InterlockedExchange(g_tBuffU[u1], u1, out_u1);
+
+  //////////////////////////////////////////////////////////////////////////
+  ///////      Test all Interlocked* functions on primitive types     //////
+  ///////                Only int and uint are allowd                 //////
+  //////////////////////////////////////////////////////////////////////////
+
+// CHECK:      [[i1b_27:%\d+]] = OpLoad %int %i1b
+// CHECK-NEXT: [[iadd27:%\d+]] = OpAtomicIAdd %int %i1 %uint_1 %uint_0 [[i1b_27]]
+// CHECK-NEXT:                   OpStore %out_i1 [[iadd27]]
+  InterlockedAdd(i1, i1b, out_i1);
+
+// CHECK:      [[and28:%\d+]] = OpAtomicAnd %uint %u1 %uint_1 %uint_0 %uint_10
+// CHECK-NEXT:                  OpStore %out_u1 [[and28]]
+  InterlockedAnd(u1, 10,  out_u1);
+
+// CHECK:       [[uint10:%\d+]] = OpBitcast %int %uint_10
+// CHECK-NEXT: [[asmax29:%\d+]] = OpAtomicSMax %int %i1 %uint_1 %uint_0 [[uint10]]
+// CHECK-NEXT:                    OpStore %out_i1 [[asmax29]]
+  InterlockedMax(i1, 10,  out_i1);
+
+// CHECK:      [[umin30:%\d+]] = OpAtomicUMin %uint %u1 %uint_1 %uint_0 %uint_10
+// CHECK-NEXT:                   OpStore %out_u1 [[umin30]]
+  InterlockedMin(u1, 10,  out_u1);
+
+// CHECK:      [[i1c_31:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT:   [[or31:%\d+]] = OpAtomicOr %int %i1 %uint_1 %uint_0 [[i1c_31]]
+// CHECK-NEXT:                   OpStore %out_i1 [[or31]]
+  InterlockedOr (i1, i1c, out_i1);
+
+// CHECK:      [[xor32:%\d+]] = OpAtomicXor %uint %u1 %uint_1 %uint_0 %uint_10
+// CHECK-NEXT:                  OpStore %out_u1 [[xor32]]
+  InterlockedXor(u1, 10,  out_u1);
+
+// CHECK:      [[i1b_33:%\d+]] = OpLoad %int %i1b
+// CHECK-NEXT: [[i1c_33:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT:        {{%\d+}} = OpAtomicCompareExchange %int %i1 %uint_1 %uint_0 %uint_0 [[i1c_33]] [[i1b_33]]
+  InterlockedCompareStore(i1, i1b, i1c);
+
+// CHECK:      [[ace34:%\d+]] = OpAtomicCompareExchange %uint %u1 %uint_1 %uint_0 %uint_0 %uint_20 %uint_15
+// CHECK-NEXT:                  OpStore %out_u1 [[ace34]]
+  InterlockedCompareExchange(u1, 15, 20, out_u1);
+
+// CHECK:      [[i1c_35:%\d+]] = OpLoad %int %i1c
+// CHECK-NEXT:  [[ace35:%\d+]] = OpAtomicExchange %int %i1 %uint_1 %uint_0 [[i1c_35]]
+// CHECK-NEXT:                   OpStore %out_i1 [[ace35]]
+  InterlockedExchange(i1, i1c, out_i1);
+}
+

+ 11 - 8
tools/clang/test/CodeGenSPIRV/method.rw-byte-address-buffer.atomic.hlsl

@@ -1,5 +1,8 @@
 // Run: %dxc -T ps_6_0 -E main
 
+// Note: According to HLSL reference (https://msdn.microsoft.com/en-us/library/windows/desktop/ff471475(v=vs.85).aspx),
+// all RWByteAddressBuffer atomic methods must take unsigned integers as parameters.
+
 RWByteAddressBuffer myBuffer;
 
 float4 main() : SV_Target
@@ -58,13 +61,13 @@ float4 main() : SV_Target
 
 // CHECK:      [[offset:%\d+]] = OpShiftRightLogical %uint %uint_16 %uint_2
 // CHECK-NEXT:    [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_uint %myBuffer %uint_0 [[offset]]
-// CHECK-NEXT:        {{%\d+}} = OpAtomicSMax %uint [[ptr]] %uint_1 %uint_0 %int_n42
-    myBuffer.InterlockedMax(16, -42);
+// CHECK-NEXT:        {{%\d+}} = OpAtomicUMax %uint [[ptr]] %uint_1 %uint_0 %uint_42
+    myBuffer.InterlockedMax(16, 42);
 // CHECK:      [[offset:%\d+]] = OpShiftRightLogical %uint %uint_16 %uint_2
 // CHECK-NEXT:    [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_uint %myBuffer %uint_0 [[offset]]
-// CHECK-NEXT:    [[val:%\d+]] = OpAtomicSMax %uint [[ptr]] %uint_1 %uint_0 %int_n42
+// CHECK-NEXT:    [[val:%\d+]] = OpAtomicUMax %uint [[ptr]] %uint_1 %uint_0 %uint_42
 // CHECK-NEXT:                   OpStore %originalVal [[val]]
-    myBuffer.InterlockedMax(16, -42, originalVal);
+    myBuffer.InterlockedMax(16, 42, originalVal);
 
 // CHECK:      [[offset:%\d+]] = OpShiftRightLogical %uint %uint_16 %uint_2
 // CHECK-NEXT:    [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_uint %myBuffer %uint_0 [[offset]]
@@ -78,13 +81,13 @@ float4 main() : SV_Target
 
 // CHECK:      [[offset:%\d+]] = OpShiftRightLogical %uint %uint_16 %uint_2
 // CHECK-NEXT:    [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_uint %myBuffer %uint_0 [[offset]]
-// CHECK-NEXT:        {{%\d+}} = OpAtomicSMin %uint [[ptr]] %uint_1 %uint_0 %int_n42
-    myBuffer.InterlockedMin(16, -42);
+// CHECK-NEXT:        {{%\d+}} = OpAtomicUMin %uint [[ptr]] %uint_1 %uint_0 %uint_42
+    myBuffer.InterlockedMin(16, 42);
 // CHECK:      [[offset:%\d+]] = OpShiftRightLogical %uint %uint_16 %uint_2
 // CHECK-NEXT:    [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_uint %myBuffer %uint_0 [[offset]]
-// CHECK-NEXT:    [[val:%\d+]] = OpAtomicSMin %uint [[ptr]] %uint_1 %uint_0 %int_n42
+// CHECK-NEXT:    [[val:%\d+]] = OpAtomicUMin %uint [[ptr]] %uint_1 %uint_0 %uint_42
 // CHECK-NEXT:                   OpStore %originalVal [[val]]
-    myBuffer.InterlockedMin(16, -42, originalVal);
+    myBuffer.InterlockedMin(16, 42, originalVal);
 
     // .InterlockedExchnage() has no two-parameter overload.
 // CHECK:      [[offset:%\d+]] = OpShiftRightLogical %uint %uint_16 %uint_2

+ 3 - 0
tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp

@@ -555,6 +555,9 @@ TEST_F(FileTest, IntrinsicsFwidth) { runFileTest("intrinsics.fwidth.hlsl"); }
 TEST_F(FileTest, IntrinsicsIsFinite) {
   runFileTest("intrinsics.isfinite.hlsl");
 }
+TEST_F(FileTest, IntrinsicsInterlockedMethods) {
+  runFileTest("intrinsics.interlocked-methods.hlsl");
+}
 TEST_F(FileTest, IntrinsicsIsInf) { runFileTest("intrinsics.isinf.hlsl"); }
 TEST_F(FileTest, IntrinsicsIsNan) { runFileTest("intrinsics.isnan.hlsl"); }
 TEST_F(FileTest, IntrinsicsLength) { runFileTest("intrinsics.length.hlsl"); }