ソースを参照

[spirv] Translate *MemoryBarrier* intrinsics. (#790)

* Translation of GroupMemoryBarrier(WithGroupSync)
* Translation of DeviceMemoryBarrier(WithGroupSync)
* Translation of AllMemoryBarrier(WithGroupSync)
Ehsan 7 年 前
コミット
4d97841e33

+ 32 - 26
docs/SPIR-V.rst

@@ -1414,32 +1414,38 @@ Using SPIR-V opcode
 
 The following intrinsic HLSL functions have direct SPIR-V opcodes for them:
 
-============================== =================================
-   HLSL Intrinsic Function              SPIR-V Opcode
-============================== =================================
-``countbits``                  ``OpBitCount``
-``ddx``                        ``OpDPdx``
-``ddy``                        ``OpDPdy``
-``ddx_coarse``                 ``OpDPdxCoarse``
-``ddy_coarse``                 ``OpDPdyCoarse``
-``ddx_fine``                   ``OpDPdxFine``
-``ddy_fine``                   ``OpDPdyFine``
-``fmod``                       ``OpFMod``
-``fwidth``                     ``OpFwidth``
-``InterlockedAdd``             ``OpAtomicIAdd``
-``InterlockedAnd``             ``OpAtomicAnd``
-``InterlockedOr``              ``OpAtomicOr``
-``InterlockedXor``             ``OpAtomicXor``
-``InterlockedMin``             ``OpAtomicUMin``/``OpAtomicSMin``
-``InterlockedMax``             ``OpAtomicUMax``/``OpAtomicSMax``
-``InterlockedExchange``        ``OpAtomicExchange``
-``InterlockedCompareExchange`` ``OpAtomicCompareExchange``
-``InterlockedCompareStore``    ``OpAtomicCompareExchange``
-``isnan``                      ``OpIsNan``
-``isInf``                      ``OpIsInf``
-``reversebits``                ``OpBitReverse``
-``transpose``                  ``OpTranspose``
-============================== =================================
+==================================== =================================
+   HLSL Intrinsic Function                   SPIR-V Opcode
+==================================== =================================
+``AllMemoryBarrier``                 ``OpMemoryBarrier``
+``AllMemoryBarrierWithGroupSync``    ``OpControlBarrier``
+``countbits``                        ``OpBitCount``
+``DeviceMemoryBarrier``              ``OpMemoryBarrier``
+``DeviceMemoryBarrierWithGroupSync`` ``OpControlBarrier``
+``ddx``                              ``OpDPdx``
+``ddy``                              ``OpDPdy``
+``ddx_coarse``                       ``OpDPdxCoarse``
+``ddy_coarse``                       ``OpDPdyCoarse``
+``ddx_fine``                         ``OpDPdxFine``
+``ddy_fine``                         ``OpDPdyFine``
+``fmod``                             ``OpFMod``
+``fwidth``                           ``OpFwidth``
+``GroupMemoryBarrier``               ``OpMemoryBarrier``
+``GroupMemoryBarrierWithGroupSync``  ``OpControlBarrier``
+``InterlockedAdd``                   ``OpAtomicIAdd``
+``InterlockedAnd``                   ``OpAtomicAnd``
+``InterlockedOr``                    ``OpAtomicOr``
+``InterlockedXor``                   ``OpAtomicXor``
+``InterlockedMin``                   ``OpAtomicUMin``/``OpAtomicSMin``
+``InterlockedMax``                   ``OpAtomicUMax``/``OpAtomicSMax``
+``InterlockedExchange``              ``OpAtomicExchange``
+``InterlockedCompareExchange``       ``OpAtomicCompareExchange``
+``InterlockedCompareStore``          ``OpAtomicCompareExchange``
+``isnan``                            ``OpIsNan``
+``isInf``                            ``OpIsInf``
+``reversebits``                      ``OpBitReverse``
+``transpose``                        ``OpTranspose``
+==================================== =================================
 
 Using GLSL extended instructions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 4 - 2
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -251,8 +251,10 @@ public:
   uint32_t createExtInst(uint32_t resultType, uint32_t setId, uint32_t instId,
                          llvm::ArrayRef<uint32_t> operands);
 
-  /// \brief Creates an OpControlBarrier instruction with the given flags.
-  void createControlBarrier(uint32_t exec, uint32_t memory, uint32_t semantics);
+  /// \brief Creates an OpMemoryBarrier or OpControlBarrier instruction with the
+  /// given flags. If execution scope id (exec) is non-zero, an OpControlBarrier
+  /// is created; otherwise an OpMemoryBarrier is created.
+  void createBarrier(uint32_t exec, uint32_t memory, uint32_t semantics);
 
   /// \brief Creates an OpEmitVertex instruction.
   void createEmitVertex();

+ 8 - 5
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -553,10 +553,13 @@ uint32_t ModuleBuilder::createExtInst(uint32_t resultType, uint32_t setId,
   return resultId;
 }
 
-void ModuleBuilder::createControlBarrier(uint32_t execution, uint32_t memory,
-                                         uint32_t semantics) {
+void ModuleBuilder::createBarrier(uint32_t execution, uint32_t memory,
+                                  uint32_t semantics) {
   assert(insertPoint && "null insert point");
-  instBuilder.opControlBarrier(execution, memory, semantics).x();
+  if (execution)
+    instBuilder.opControlBarrier(execution, memory, semantics).x();
+  else
+    instBuilder.opMemoryBarrier(memory, semantics).x();
   insertPoint->appendInstruction(std::move(constructSite));
 }
 
@@ -924,8 +927,8 @@ uint32_t ModuleBuilder::getByteAddressBufferType(bool isRW) {
   const Type *type = Type::getStruct(theContext, {raTypeId}, typeDecs);
   const uint32_t typeId = theContext.getResultIdForType(type);
   theModule.addType(type, typeId);
-  theModule.addDebugName(
-      typeId, isRW ? "type.RWByteAddressBuffer" : "type.ByteAddressBuffer");
+  theModule.addDebugName(typeId, isRW ? "type.RWByteAddressBuffer"
+                                      : "type.ByteAddressBuffer");
   return typeId;
 }
 

+ 83 - 23
tools/clang/lib/SPIRV/SPIRVEmitter.cpp

@@ -1905,8 +1905,7 @@ SPIRVEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) {
   }
 
   const uint32_t query =
-      lod
-          ? theBuilder.createBinaryOp(spv::Op::OpImageQuerySizeLod,
+      lod ? theBuilder.createBinaryOp(spv::Op::OpImageQuerySizeLod,
                                       resultTypeId, objectId, lod)
           : theBuilder.createUnaryOp(spv::Op::OpImageQuerySize, resultTypeId,
                                      objectId);
@@ -2916,8 +2915,9 @@ SpirvEvalInfo SPIRVEmitter::doUnaryOperator(const UnaryOperator *expr) {
     uint32_t incValue = 0;
     if (TypeTranslator::isSpirvAcceptableMatrixType(subType)) {
       // For matrices, we can only increment/decrement each vector of it.
-      const auto actOnEachVec = [this, spvOp, one](
-          uint32_t /*index*/, uint32_t vecType, uint32_t lhsVec) {
+      const auto actOnEachVec = [this, spvOp, one](uint32_t /*index*/,
+                                                   uint32_t vecType,
+                                                   uint32_t lhsVec) {
         return theBuilder.createBinaryOp(spvOp, vecType, lhsVec, one);
       };
       incValue = processEachVectorInMatrix(subExpr, originValue, actOnEachVec);
@@ -3807,8 +3807,9 @@ SPIRVEmitter::processMatrixBinaryOp(const Expr *lhs, const Expr *rhs,
   case BO_DivAssign:
   case BO_RemAssign: {
     const uint32_t vecType = typeTranslator.getComponentVectorType(lhsType);
-    const auto actOnEachVec = [this, spvOp, rhsVal](
-        uint32_t index, uint32_t vecType, uint32_t lhsVec) {
+    const auto actOnEachVec = [this, spvOp, rhsVal](uint32_t index,
+                                                    uint32_t vecType,
+                                                    uint32_t lhsVec) {
       // For each vector of lhs, we need to load the corresponding vector of
       // rhs and do the operation on them.
       const uint32_t rhsVec =
@@ -4079,6 +4080,32 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   }
   case hlsl::IntrinsicOp::IOP_dot:
     return processIntrinsicDot(callExpr);
+  case hlsl::IntrinsicOp::IOP_GroupMemoryBarrier:
+    return processIntrinsicMemoryBarrier(callExpr,
+                                         /*isDevice*/ false,
+                                         /*groupSync*/ false,
+                                         /*isAllBarrier*/ false);
+  case hlsl::IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
+    return processIntrinsicMemoryBarrier(callExpr,
+                                         /*isDevice*/ false,
+                                         /*groupSync*/ true,
+                                         /*isAllBarrier*/ false);
+  case hlsl::IntrinsicOp::IOP_DeviceMemoryBarrier:
+    return processIntrinsicMemoryBarrier(callExpr, /*isDevice*/ true,
+                                         /*groupSync*/ false,
+                                         /*isAllBarrier*/ false);
+  case hlsl::IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
+    return processIntrinsicMemoryBarrier(callExpr, /*isDevice*/ true,
+                                         /*groupSync*/ true,
+                                         /*isAllBarrier*/ false);
+  case hlsl::IntrinsicOp::IOP_AllMemoryBarrier:
+    return processIntrinsicMemoryBarrier(callExpr, /*isDevice*/ true,
+                                         /*groupSync*/ false,
+                                         /*isAllBarrier*/ true);
+  case hlsl::IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
+    return processIntrinsicMemoryBarrier(callExpr, /*isDevice*/ true,
+                                         /*groupSync*/ true,
+                                         /*isAllBarrier*/ true);
   case hlsl::IntrinsicOp::IOP_mul:
     return processIntrinsicMul(callExpr);
   case hlsl::IntrinsicOp::IOP_all:
@@ -4254,7 +4281,8 @@ SPIRVEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
   };
 
   const auto writeToOutputArg = [&baseType, dest, this](
-      uint32_t toWrite, const CallExpr *callExpr, uint32_t outputArgIndex) {
+                                    uint32_t toWrite, const CallExpr *callExpr,
+                                    uint32_t outputArgIndex) {
     const auto outputArg = callExpr->getArg(outputArgIndex);
     const auto outputArgType = outputArg->getType();
     if (baseType != outputArgType)
@@ -4645,6 +4673,31 @@ uint32_t SPIRVEmitter::processIntrinsicClamp(const CallExpr *callExpr) {
                                   {argXId, argMinId, argMaxId});
 }
 
+uint32_t SPIRVEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
+                                                     bool isDevice,
+                                                     bool groupSync,
+                                                     bool isAllBarrier) {
+  // Execution Barrier scope:
+  // Device    = 1
+  // Workgroup = 2
+  // Memory Barrier scope:
+  // Device    = 1
+  // Workgroup = 2
+  // Memory Semantics Barrier scope:
+  // WorkgroupMemory      = 0x100 = 256
+  // CrossWorkgroupMemory = 0x200 = 512
+  // 'All Memory Barrier' must place barrier at several different levels, so
+  // several flags must be turned on:
+  // 0x10 | 0x40 | 0x80 | 0x100 | 0x200 | 0x400 | 0x800 = 0xFD0 = 4048.
+  const uint32_t memSemaMask = isAllBarrier ? 0xFD0 : isDevice ? 0x200 : 0x100;
+  const auto memSema = theBuilder.getConstantUint32(memSemaMask);
+  const auto memScope = isDevice ? theBuilder.getConstantUint32(1)
+                                 : theBuilder.getConstantUint32(2);
+  const auto execScope = groupSync ? memScope : 0;
+  theBuilder.createBarrier(execScope, memScope, memSema);
+  return 0;
+}
+
 uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
   const QualType returnType = callExpr->getType();
   const uint32_t returnTypeId =
@@ -4864,8 +4917,9 @@ uint32_t SPIRVEmitter::processIntrinsicRcp(const CallExpr *callExpr) {
   uint32_t numRows = 0, numCols = 0;
   if (TypeTranslator::isMxNMatrix(argType, &elemType, &numRows, &numCols)) {
     const uint32_t vecOne = getVecValueOne(elemType, numCols);
-    const auto actOnEachVec = [this, vecOne](
-        uint32_t /*index*/, uint32_t vecType, uint32_t curRowId) {
+    const auto actOnEachVec = [this, vecOne](uint32_t /*index*/,
+                                             uint32_t vecType,
+                                             uint32_t curRowId) {
       return theBuilder.createBinaryOp(spv::Op::OpFDiv, vecType, vecOne,
                                        curRowId);
     };
@@ -5127,7 +5181,8 @@ uint32_t SPIRVEmitter::processIntrinsicSaturate(const CallExpr *callExpr) {
     const uint32_t vecZero = getVecValueZero(elemType, numCols);
     const uint32_t vecOne = getVecValueOne(elemType, numCols);
     const auto actOnEachVec = [this, vecZero, vecOne, glslInstSetId](
-        uint32_t /*index*/, uint32_t vecType, uint32_t curRowId) {
+                                  uint32_t /*index*/, uint32_t vecType,
+                                  uint32_t curRowId) {
       return theBuilder.createExtInst(vecType, glslInstSetId,
                                       GLSLstd450::GLSLstd450FClamp,
                                       {curRowId, vecZero, vecOne});
@@ -5153,8 +5208,9 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
 
   // For matrices, we can perform the instruction on each vector of the matrix.
   if (TypeTranslator::isSpirvAcceptableMatrixType(argType)) {
-    const auto actOnEachVec = [this, glslInstSetId](
-        uint32_t /*index*/, uint32_t vecType, uint32_t curRowId) {
+    const auto actOnEachVec = [this, glslInstSetId](uint32_t /*index*/,
+                                                    uint32_t vecType,
+                                                    uint32_t curRowId) {
       return theBuilder.createExtInst(vecType, glslInstSetId,
                                       GLSLstd450::GLSLstd450FSign, {curRowId});
     };
@@ -5253,8 +5309,9 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
     // instruction on each vector of the matrix.
     if (actPerRowForMatrices &&
         TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
-      const auto actOnEachVec = [this, opcode](
-          uint32_t /*index*/, uint32_t vecType, uint32_t curRowId) {
+      const auto actOnEachVec = [this, opcode](uint32_t /*index*/,
+                                               uint32_t vecType,
+                                               uint32_t curRowId) {
         return theBuilder.createUnaryOp(opcode, vecType, {curRowId});
       };
       return processEachVectorInMatrix(arg, argId, actOnEachVec);
@@ -5268,8 +5325,9 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
     // instruction on each vector of the matrix.
     if (actPerRowForMatrices &&
         TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
-      const auto actOnEachVec = [this, opcode, arg1Id](
-          uint32_t index, uint32_t vecType, uint32_t arg0RowId) {
+      const auto actOnEachVec = [this, opcode, arg1Id](uint32_t index,
+                                                       uint32_t vecType,
+                                                       uint32_t arg0RowId) {
         const uint32_t arg1RowId =
             theBuilder.createCompositeExtract(vecType, arg1Id, {index});
         return theBuilder.createBinaryOp(opcode, vecType, arg0RowId, arg1RowId);
@@ -5297,8 +5355,9 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
     // instruction on each vector of the matrix.
     if (actPerRowForMatrices &&
         TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
-      const auto actOnEachVec = [this, glslInstSetId, opcode](
-          uint32_t /*index*/, uint32_t vecType, uint32_t curRowId) {
+      const auto actOnEachVec = [this, glslInstSetId,
+                                 opcode](uint32_t /*index*/, uint32_t vecType,
+                                         uint32_t curRowId) {
         return theBuilder.createExtInst(vecType, glslInstSetId, opcode,
                                         {curRowId});
       };
@@ -5313,8 +5372,9 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
     // instruction on each vector of the matrix.
     if (actPerRowForMatrices &&
         TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
-      const auto actOnEachVec = [this, glslInstSetId, opcode, arg1Id](
-          uint32_t index, uint32_t vecType, uint32_t arg0RowId) {
+      const auto actOnEachVec = [this, glslInstSetId, opcode,
+                                 arg1Id](uint32_t index, uint32_t vecType,
+                                         uint32_t arg0RowId) {
         const uint32_t arg1RowId =
             theBuilder.createCompositeExtract(vecType, arg1Id, {index});
         return theBuilder.createExtInst(vecType, glslInstSetId, opcode,
@@ -6041,9 +6101,9 @@ bool SPIRVEmitter::processHullEntryPointOutputAndPatchConstFunc(
   // Execution Barrier scope = Workgroup (2)
   // Memory Barrier scope = Device (1)
   // Memory Semantics Barrier scope = None (0)
-  theBuilder.createControlBarrier(theBuilder.getConstantUint32(2),
-                                  theBuilder.getConstantUint32(1),
-                                  theBuilder.getConstantUint32(0));
+  theBuilder.createBarrier(theBuilder.getConstantUint32(2),
+                           theBuilder.getConstantUint32(1),
+                           theBuilder.getConstantUint32(0));
 
   // The PCF should be called only once. Therefore, we check the invocationID,
   // and we only allow ID 0 to call the PCF.

+ 7 - 0
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -270,6 +270,13 @@ private:
   /// Processes the 'lit' intrinsic function.
   uint32_t processIntrinsicLit(const CallExpr *);
 
+  /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync',
+  /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync',
+  /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic
+  /// functions.
+  uint32_t processIntrinsicMemoryBarrier(const CallExpr *, bool isDevice,
+                                         bool groupSync, bool isAllBarrier);
+
   /// Processes the 'modf' intrinsic function.
   uint32_t processIntrinsicModf(const CallExpr *);
 

+ 6 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.allmemorybarrier.hlsl

@@ -0,0 +1,6 @@
+// Run: %dxc -T cs_6_0 -E main
+
+void main() {
+// CHECK: OpMemoryBarrier %uint_1 %uint_4048
+  AllMemoryBarrier();
+}

+ 6 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.allmemorybarrierwithgroupsync.hlsl

@@ -0,0 +1,6 @@
+// Run: %dxc -T cs_6_0 -E main
+
+void main() {
+// CHECK: OpControlBarrier %uint_1 %uint_1 %uint_4048
+  AllMemoryBarrierWithGroupSync();
+}

+ 6 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.devicememorybarrier.hlsl

@@ -0,0 +1,6 @@
+// Run: %dxc -T cs_6_0 -E main
+
+void main() {
+// CHECK: OpMemoryBarrier %uint_1 %uint_512
+  DeviceMemoryBarrier();
+}

+ 6 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.devicememorybarrierwithgroupsync.hlsl

@@ -0,0 +1,6 @@
+// Run: %dxc -T cs_6_0 -E main
+
+void main() {
+// CHECK: OpControlBarrier %uint_1 %uint_1 %uint_512
+  DeviceMemoryBarrierWithGroupSync();
+}

+ 6 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.groupmemorybarrier.hlsl

@@ -0,0 +1,6 @@
+// Run: %dxc -T cs_6_0 -E main
+
+void main() {
+// CHECK: OpMemoryBarrier %uint_2 %uint_256
+  GroupMemoryBarrier();
+}

+ 6 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.groupmemorybarrierwithgroupsync.hlsl

@@ -0,0 +1,6 @@
+// Run: %dxc -T cs_6_0 -E main
+
+void main() {
+// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_256
+  GroupMemoryBarrierWithGroupSync();
+}

+ 18 - 0
tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp

@@ -647,6 +647,24 @@ TEST_F(FileTest, IntrinsicsFmod) { runFileTest("intrinsics.fmod.hlsl"); }
 TEST_F(FileTest, IntrinsicsFrac) { runFileTest("intrinsics.frac.hlsl"); }
 TEST_F(FileTest, IntrinsicsFrexp) { runFileTest("intrinsics.frexp.hlsl"); }
 TEST_F(FileTest, IntrinsicsFwidth) { runFileTest("intrinsics.fwidth.hlsl"); }
+TEST_F(FileTest, IntrinsicsDeviceMemoryBarrier) {
+  runFileTest("intrinsics.devicememorybarrier.hlsl");
+}
+TEST_F(FileTest, IntrinsicsAllMemoryBarrier) {
+  runFileTest("intrinsics.allmemorybarrier.hlsl");
+}
+TEST_F(FileTest, IntrinsicsAllMemoryBarrierWithGroupSync) {
+  runFileTest("intrinsics.allmemorybarrierwithgroupsync.hlsl");
+}
+TEST_F(FileTest, IntrinsicsDeviceMemoryBarrierWithGroupSync) {
+  runFileTest("intrinsics.devicememorybarrierwithgroupsync.hlsl");
+}
+TEST_F(FileTest, IntrinsicsGroupMemoryBarrier) {
+  runFileTest("intrinsics.groupmemorybarrier.hlsl");
+}
+TEST_F(FileTest, IntrinsicsGroupMemoryBarrierWithGroupSync) {
+  runFileTest("intrinsics.groupmemorybarrierwithgroupsync.hlsl");
+}
 TEST_F(FileTest, IntrinsicsIsFinite) {
   runFileTest("intrinsics.isfinite.hlsl");
 }