Browse Source

[spirv] Remove BufferBlock decoration (deprecated after SPIRV 1.3). (#2790)

Ehsan 5 years ago
parent
commit
fc2137349d
34 changed files with 1065 additions and 8 deletions
  1. 1 0
      tools/clang/lib/SPIRV/CMakeLists.txt
  2. 12 1
      tools/clang/lib/SPIRV/EmitVisitor.cpp
  3. 10 7
      tools/clang/lib/SPIRV/EmitVisitor.h
  4. 81 0
      tools/clang/lib/SPIRV/RemoveBufferBlockVisitor.cpp
  5. 39 0
      tools/clang/lib/SPIRV/RemoveBufferBlockVisitor.h
  6. 6 0
      tools/clang/lib/SPIRV/SpirvBuilder.cpp
  7. 135 0
      tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.vulkan1.2.hlsl
  8. 32 0
      tools/clang/test/CodeGenSPIRV/sm6.quad-read-across-diagonal.vulkan1.2.hlsl
  9. 32 0
      tools/clang/test/CodeGenSPIRV/sm6.quad-read-across-x.vulkan1.2.hlsl
  10. 32 0
      tools/clang/test/CodeGenSPIRV/sm6.quad-read-across-y.vulkan1.2.hlsl
  11. 33 0
      tools/clang/test/CodeGenSPIRV/sm6.quad-read-lane-at.vulkan1.2.hlsl
  12. 27 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-all-equal.vulkan1.2.hlsl
  13. 20 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-all-true.vulkan1.2.hlsl
  14. 20 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-any-true.vulkan1.2.hlsl
  15. 20 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-ballot.vulkan1.2.hlsl
  16. 38 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-bit-and.vulkan1.2.hlsl
  17. 38 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-bit-or.vulkan1.2.hlsl
  18. 38 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-bit-xor.vulkan1.2.hlsl
  19. 23 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-count-bits.vulkan1.2.hlsl
  20. 31 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-max.vulkan1.2.hlsl
  21. 31 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-min.vulkan1.2.hlsl
  22. 31 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-product.vulkan1.2.hlsl
  23. 31 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-active-sum.vulkan1.2.hlsl
  24. 20 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-get-lane-count.vulkan1.2.hlsl
  25. 20 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-get-lane-index.vulkan1.2.hlsl
  26. 13 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-is-first-lane.vulkan1.2.hlsl
  27. 21 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-count-bits.vulkan1.2.hlsl
  28. 31 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-product.vulkan1.2.hlsl
  29. 31 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-sum.vulkan1.2.hlsl
  30. 32 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-read-lane-at.vulkan1.2.hlsl
  31. 32 0
      tools/clang/test/CodeGenSPIRV/sm6.wave-read-lane-first.vulkan1.2.hlsl
  32. 27 0
      tools/clang/test/CodeGenSPIRV/sm6.wave.builtin.no-dup.vulkan1.2.hlsl
  33. 38 0
      tools/clang/test/CodeGenSPIRV/vk.1p2.block-decoration.hlsl
  34. 39 0
      tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

+ 1 - 0
tools/clang/lib/SPIRV/CMakeLists.txt

@@ -18,6 +18,7 @@ add_clang_library(clangSPIRV
   PreciseVisitor.cpp
   RawBufferMethods.cpp
   RelaxedPrecisionVisitor.cpp
+  RemoveBufferBlockVisitor.cpp
   SpirvBasicBlock.cpp
   SpirvBuilder.cpp
   SpirvContext.cpp

+ 12 - 1
tools/clang/lib/SPIRV/EmitVisitor.cpp

@@ -89,6 +89,13 @@ uint32_t getHeaderVersion(llvm::StringRef env) {
   return 0x00010000u;
 }
 
+// Returns true if the BufferBlock decoration is deprecated for the target
+// Vulkan environment.
+bool isBufferBlockDecorationDeprecated(
+    const clang::spirv::SpirvCodeGenOptions &opts) {
+  return opts.targetEnv.compare("vulkan1.2") >= 0;
+}
+
 constexpr uint32_t kGeneratorNumber = 14;
 constexpr uint32_t kToolVersion = 0;
 
@@ -1587,7 +1594,11 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) {
     // Emit Block or BufferBlock decorations if necessary.
     auto interfaceType = structType->getInterfaceType();
     if (interfaceType == StructInterfaceType::StorageBuffer)
-      emitDecoration(id, spv::Decoration::BufferBlock, {});
+      emitDecoration(id,
+                     isBufferBlockDecorationDeprecated(spvOptions)
+                         ? spv::Decoration::Block
+                         : spv::Decoration::BufferBlock,
+                     {});
     else if (interfaceType == StructInterfaceType::UniformBuffer)
       emitDecoration(id, spv::Decoration::Block, {});
 

+ 10 - 7
tools/clang/lib/SPIRV/EmitVisitor.h

@@ -45,15 +45,17 @@ public:
 
 public:
   EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext,
+                  const SpirvCodeGenOptions &opts,
                   std::vector<uint32_t> *debugVec,
                   std::vector<uint32_t> *decVec,
                   std::vector<uint32_t> *typesVec,
                   const std::function<uint32_t()> &takeNextIdFn)
-      : astContext(astCtx), context(spvContext), debugVariableBinary(debugVec),
-        annotationsBinary(decVec), typeConstantBinary(typesVec),
-        takeNextIdFunction(takeNextIdFn), emittedConstantInts({}),
-        emittedConstantFloats({}), emittedConstantComposites({}),
-        emittedConstantNulls({}), emittedConstantBools() {
+      : astContext(astCtx), context(spvContext), spvOptions(opts),
+        debugVariableBinary(debugVec), annotationsBinary(decVec),
+        typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn),
+        emittedConstantInts({}), emittedConstantFloats({}),
+        emittedConstantComposites({}), emittedConstantNulls({}),
+        emittedConstantBools() {
     assert(decVec);
     assert(typesVec);
   }
@@ -143,6 +145,7 @@ private:
 private:
   ASTContext &astContext;
   SpirvContext &context;
+  const SpirvCodeGenOptions &spvOptions;
   std::vector<uint32_t> curTypeInst;
   std::vector<uint32_t> curDecorationInst;
   std::vector<uint32_t> *debugVariableBinary;
@@ -189,8 +192,8 @@ public:
   EmitVisitor(ASTContext &astCtx, SpirvContext &spvCtx,
               const SpirvCodeGenOptions &opts)
       : Visitor(opts, spvCtx), astContext(astCtx), id(0),
-        typeHandler(astCtx, spvCtx, &debugVariableBinary, &annotationsBinary,
-                    &typeConstantBinary,
+        typeHandler(astCtx, spvCtx, opts, &debugVariableBinary,
+                    &annotationsBinary, &typeConstantBinary,
                     [this]() -> uint32_t { return takeNextId(); }),
         debugMainFileId(0), debugLine(0), debugColumn(0),
         lastOpWasMergeInst(false) {}

+ 81 - 0
tools/clang/lib/SPIRV/RemoveBufferBlockVisitor.cpp

@@ -0,0 +1,81 @@
+//===-- RemoveBufferBlockVisitor.cpp - RemoveBufferBlock Visitor -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RemoveBufferBlockVisitor.h"
+#include "clang/SPIRV/SpirvContext.h"
+
+namespace {
+
+bool isBufferBlockDecorationDeprecated(
+    const clang::spirv::SpirvCodeGenOptions &opts) {
+  return opts.targetEnv.compare("vulkan1.2") >= 0;
+}
+
+} // end anonymous namespace
+
+namespace clang {
+namespace spirv {
+
+bool RemoveBufferBlockVisitor::visit(SpirvModule *mod, Phase phase) {
+  // If the target environment is Vulkan 1.2 or later, BufferBlock decoration is
+  // deprecated and should be removed from the module.
+  // Otherwise, no action is needed by this IMR visitor.
+  if (phase == Visitor::Phase::Init)
+    if (!isBufferBlockDecorationDeprecated(spvOptions))
+      return false;
+
+  return true;
+}
+
+bool RemoveBufferBlockVisitor::visitInstruction(SpirvInstruction *inst) {
+  if (!inst->getResultType())
+    return true;
+
+  // OpAccessChain can obtain pointers to any type. Its result type is
+  // OpTypePointer, and it should get the same storage class as its base.
+  if (auto *accessChain = dyn_cast<SpirvAccessChain>(inst)) {
+    auto *accessChainType = accessChain->getResultType();
+    auto *baseType = accessChain->getBase()->getResultType();
+    // The result type of OpAccessChain and the result type of its base must be
+    // OpTypePointer.
+    assert(isa<SpirvPointerType>(accessChainType));
+    assert(isa<SpirvPointerType>(baseType));
+    auto *accessChainPtr = dyn_cast<SpirvPointerType>(accessChainType);
+    auto *basePtr = dyn_cast<SpirvPointerType>(baseType);
+    auto baseStorageClass = basePtr->getStorageClass();
+    if (accessChainPtr->getStorageClass() != baseStorageClass) {
+      auto *newAccessChainType = context.getPointerType(
+          accessChainPtr->getPointeeType(), baseStorageClass);
+      inst->setStorageClass(baseStorageClass);
+      inst->setResultType(newAccessChainType);
+    }
+  }
+
+  // For all instructions, if the result type is a pointer pointing to a struct
+  // with StorageBuffer interface, the storage class must be updated.
+  if (auto *ptrResultType = dyn_cast<SpirvPointerType>(inst->getResultType())) {
+    if (auto *structPointeeType =
+            dyn_cast<StructType>(ptrResultType->getPointeeType())) {
+      // Update the instruction's storage class if necessary
+      if (structPointeeType->getInterfaceType() ==
+              StructInterfaceType::StorageBuffer &&
+          ptrResultType->getStorageClass() !=
+              spv::StorageClass::StorageBuffer) {
+        inst->setStorageClass(spv::StorageClass::StorageBuffer);
+        inst->setResultType(context.getPointerType(
+            ptrResultType->getPointeeType(), spv::StorageClass::StorageBuffer));
+      }
+    }
+  }
+
+  return true;
+}
+
+} // end namespace spirv
+} // end namespace clang

+ 39 - 0
tools/clang/lib/SPIRV/RemoveBufferBlockVisitor.h

@@ -0,0 +1,39 @@
+//===--- RemoveBufferBlockVisitor.h - RemoveBufferBlock Visitor --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_SPIRV_REMOVEBUFFERBLOCKVISITOR_H
+#define LLVM_CLANG_LIB_SPIRV_REMOVEBUFFERBLOCKVISITOR_H
+
+#include "clang/SPIRV/SpirvVisitor.h"
+
+namespace clang {
+namespace spirv {
+
+class SpirvContext;
+
+class RemoveBufferBlockVisitor : public Visitor {
+public:
+  RemoveBufferBlockVisitor(SpirvContext &spvCtx,
+                           const SpirvCodeGenOptions &opts)
+      : Visitor(opts, spvCtx) {}
+
+  bool visit(SpirvModule *, Phase);
+
+  /// The "sink" visit function for all instructions.
+  ///
+  /// By default, all other visit instructions redirect to this visit function.
+  /// So that you want override this visit function to handle all instructions,
+  /// regardless of their polymorphism.
+  bool visitInstruction(SpirvInstruction *instr);
+};
+
+} // end namespace spirv
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_SPIRV_REMOVEBUFFERBLOCKVISITOR_H

+ 6 - 0
tools/clang/lib/SPIRV/SpirvBuilder.cpp

@@ -14,6 +14,7 @@
 #include "LowerTypeVisitor.h"
 #include "PreciseVisitor.h"
 #include "RelaxedPrecisionVisitor.h"
+#include "RemoveBufferBlockVisitor.h"
 #include "clang/SPIRV/AstTypeProbe.h"
 
 namespace clang {
@@ -1034,6 +1035,7 @@ std::vector<uint32_t> SpirvBuilder::takeModule() {
   CapabilityVisitor capabilityVisitor(astContext, context, spirvOptions, *this);
   RelaxedPrecisionVisitor relaxedPrecisionVisitor(context, spirvOptions);
   PreciseVisitor preciseVisitor(context, spirvOptions);
+  RemoveBufferBlockVisitor removeBufferBlockVisitor(context, spirvOptions);
   EmitVisitor emitVisitor(astContext, context, spirvOptions);
 
   mod->invokeVisitor(&literalTypeVisitor, true);
@@ -1050,6 +1052,10 @@ std::vector<uint32_t> SpirvBuilder::takeModule() {
   // Propagate NoContraction decorations
   mod->invokeVisitor(&preciseVisitor, true);
 
+  // Remove BufferBlock decoration if necessary (this decoration is deprecated
+  // after SPIR-V 1.3).
+  mod->invokeVisitor(&removeBufferBlockVisitor);
+
   // Emit SPIR-V
   mod->invokeVisitor(&emitVisitor);
 

+ 135 - 0
tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.vulkan1.2.hlsl

@@ -0,0 +1,135 @@
+// Run: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.2
+
+// CHECK:  OpCapability MeshShadingNV
+// CHECK:  OpExtension "SPV_NV_mesh_shader"
+// CHECK:  OpEntryPoint TaskNV %main "main"
+
+struct SubMesh {
+    uint vertexCount;
+    uint vertexOffset;
+    uint primitiveCount;
+    uint indexOffset;
+    float4 boundingBox[8];
+};
+
+struct Mesh {
+    uint firstSubmesh;
+    uint submeshCount;
+    uint dummy[2];
+};
+
+struct UserVertex {
+    float3 position;
+    float2 texcoord;
+    float3 color;
+};
+
+[[vk::binding(0, 0)]]
+RWStructuredBuffer<UserVertex> userVertices;
+
+[[vk::binding(1, 0)]]
+RWStructuredBuffer<uint> userIndices;
+
+[[vk::binding(2, 0)]]
+RWStructuredBuffer<Mesh> meshes;
+
+[[vk::binding(3, 0)]]
+RWStructuredBuffer<SubMesh> submeshes;
+
+[[vk::binding(4, 0)]]
+cbuffer UBO {
+    row_major float4x4 mvp;
+}
+
+groupshared uint passedSubmeshes;
+struct SubMeshes {
+    uint submeshID[256] : SUBMESH;
+};
+groupshared SubMeshes sharedSubMeshes;
+
+// CHECK:  %_arr_v4float_uint_8_0 = OpTypeArray %v4float %uint_8
+// CHECK:  %SubMesh_0 = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8_0
+// CHECK:  %_ptr_Function_SubMesh_0 = OpTypePointer Function %SubMesh_0
+// CHECK:  [[funcType:%\d+]] = OpTypeFunction %bool %_ptr_Function_SubMesh_0
+
+bool TestSubmesh(SubMesh submesh) {
+    uint clip = 0x0U;
+
+    for (uint bbv = 0U ; bbv < 8U; bbv++) {
+        float4 pos= mul(mvp, submesh.boundingBox[bbv]);
+        if (pos.x <= pos.w) clip |= 0x1U;
+        if (pos.y <= 0.3333 * pos.w) clip |= 0x2U;
+        if (pos.z <= pos.w) clip |= 0x4U;
+        if (pos.x >= -pos.w) clip |= 0x8U;
+        if (pos.y >= -pos.w) clip |= 0x10U;
+        if (pos.z >= -pos.w) clip |= 0x20U;
+    }
+    return (clip == 0x3FU);
+}
+
+[numthreads(32, 1, 1)]
+void main(
+            in uint tid : SV_GroupThreadID,
+            in uint mid : SV_GroupID
+         )
+{
+    uint firstSubmesh = meshes[mid].firstSubmesh;
+    uint submeshCount = meshes[mid].submeshCount;
+    passedSubmeshes = 0U;
+    GroupMemoryBarrier();
+    for (uint i = 0U; i < submeshCount; i += 32U) {
+        uint smid = firstSubmesh + i + tid;
+        if (smid >= firstSubmesh + submeshCount) continue;
+
+// CHECK:  %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
+// CHECK:  %passed = OpVariable %_ptr_Function_bool Function
+// CHECK:  %param_var_submesh = OpVariable %_ptr_Function_SubMesh_0 Function
+        SubMesh submesh = submeshes[smid];
+        bool passed = true;
+
+// CHECK:  [[submeshValue:%\d+]] = OpLoad %SubMesh_0 %submesh
+// CHECK:  OpStore %param_var_submesh [[submeshValue]]
+// CHECK:  [[rv:%\d+]] = OpFunctionCall %bool %TestSubmesh %param_var_submesh
+// CHECK:  [[cond:%\d+]] = OpLogicalNot %bool [[rv]]
+// CHECK:  OpSelectionMerge %if_merge_0 None
+// CHECK:  OpBranchConditional [[cond]] %if_true_0 %if_merge_0
+// CHECK:  %if_true_0 = OpLabel
+// CHECK:  OpStore %passed %false
+// CHECK:  OpBranch %if_merge_0
+// CHECK:  %if_merge_0 = OpLabel
+        if (!TestSubmesh(submesh)) passed = false;
+
+        if (passed) {
+            uint ballot = WaveActiveBallot(passed).x;
+            uint laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
+            uint lowerThreads = ballot & laneMaskLT;
+            uint slot = passedSubmeshes + WavePrefixCountBits(passed);
+            sharedSubMeshes.submeshID[slot] = smid;
+            if (lowerThreads == 0U) {
+                passedSubmeshes += WaveActiveCountBits(passed);
+            }
+        }
+        GroupMemoryBarrier();
+    }
+    DispatchMesh(passedSubmeshes, 1, 1, sharedSubMeshes);
+}
+
+/* bool TestSubmesh(SubMesh submesh) { ... } */
+
+// CHECK:  %TestSubmesh = OpFunction %bool None [[funcType]]
+// CHECK:  %submesh_0 = OpFunctionParameter %_ptr_Function_SubMesh_0
+
+// CHECK:  %bb_entry_0 = OpLabel
+
+// CHECK:  %clip = OpVariable %_ptr_Function_uint Function
+// CHECK:  %bbv = OpVariable %_ptr_Function_uint Function
+// CHECK:  %pos = OpVariable %_ptr_Function_v4float Function
+
+// CHECK:  %for_check_0 = OpLabel
+// CHECK:  %for_body_0 = OpLabel
+// CHECK:  %for_merge_0 = OpLabel
+
+// CHECK:  [[clipValue:%\d+]] = OpLoad %uint %clip
+// CHECK:  [[retValue:%\d+]] = OpIEqual %bool [[clipValue]] %uint_63
+// CHECK:  OpReturnValue [[retValue]]
+// CHECK:  OpFunctionEnd

+ 32 - 0
tools/clang/test/CodeGenSPIRV/sm6.quad-read-across-diagonal.vulkan1.2.hlsl

@@ -0,0 +1,32 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     int4 val1;
+    uint3 val2;
+    float val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformQuad
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+     int4 val1 = values[x].val1;
+    uint3 val2 = values[x].val2;
+    float val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4int %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %v4int %uint_3 [[val1]] %uint_2
+    values[x].val1 = QuadReadAcrossDiagonal(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %v3uint %uint_3 [[val2]] %uint_2
+    values[x].val2 = QuadReadAcrossDiagonal(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %float %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %float %uint_3 [[val3]] %uint_2
+    values[x].val3 = QuadReadAcrossDiagonal(val3);
+}

+ 32 - 0
tools/clang/test/CodeGenSPIRV/sm6.quad-read-across-x.vulkan1.2.hlsl

@@ -0,0 +1,32 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     int4 val1;
+    uint3 val2;
+    float val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformQuad
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+     int4 val1 = values[x].val1;
+    uint3 val2 = values[x].val2;
+    float val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4int %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %v4int %uint_3 [[val1]] %uint_0
+    values[x].val1 = QuadReadAcrossX(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %v3uint %uint_3 [[val2]] %uint_0
+    values[x].val2 = QuadReadAcrossX(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %float %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %float %uint_3 [[val3]] %uint_0
+    values[x].val3 = QuadReadAcrossX(val3);
+}

+ 32 - 0
tools/clang/test/CodeGenSPIRV/sm6.quad-read-across-y.vulkan1.2.hlsl

@@ -0,0 +1,32 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     int4 val1;
+    uint3 val2;
+    float val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformQuad
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+     int4 val1 = values[x].val1;
+    uint3 val2 = values[x].val2;
+    float val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4int %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %v4int %uint_3 [[val1]] %uint_1
+    values[x].val1 = QuadReadAcrossY(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %v3uint %uint_3 [[val2]] %uint_1
+    values[x].val2 = QuadReadAcrossY(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %float %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadSwap %float %uint_3 [[val3]] %uint_1
+    values[x].val3 = QuadReadAcrossY(val3);
+}

+ 33 - 0
tools/clang/test/CodeGenSPIRV/sm6.quad-read-lane-at.vulkan1.2.hlsl

@@ -0,0 +1,33 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    float4 val1;
+     uint3 val2;
+       int val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformQuad
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+    float4 val1 = values[x].val1;
+     uint3 val2 = values[x].val2;
+       int val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4float %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadBroadcast %v4float %uint_3 [[val1]] %uint_0
+    values[x].val1 = QuadReadLaneAt(val1, 0);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadBroadcast %v3uint %uint_3 [[val2]] %uint_1
+    values[x].val2 = QuadReadLaneAt(val2, 1);
+// CHECK:      [[val3:%\d+]] = OpLoad %int %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformQuadBroadcast %int %uint_3 [[val3]] %uint_2
+    values[x].val3 = QuadReadLaneAt(val3, 2);
+}
+

+ 27 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-all-equal.vulkan1.2.hlsl

@@ -0,0 +1,27 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    float4 val1;
+    uint val2;
+    bool res;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformVote
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+// CHECK:         [[ptr:%\d+]] = OpAccessChain %_ptr_StorageBuffer_v4float %values %int_0 {{%\d+}} %int_0
+// CHECK-NEXT: [[f32val:%\d+]] = OpLoad %v4float [[ptr]]
+// TODO: The front end will return bool4 for the first call, which acutally should be bool.
+// XXXXX-NEXT:        {{%\d+}} = OpGroupNonUniformAllEqual %bool %uint_3 [[f32val]]
+
+// CHECK:         [[ptr:%\d+]] = OpAccessChain %_ptr_StorageBuffer_uint %values %int_0 {{%\d+}} %int_1
+// CHECK-NEXT: [[u32val:%\d+]] = OpLoad %uint [[ptr]]
+// CHECK-NEXT:        {{%\d+}} = OpGroupNonUniformAllEqual %bool %uint_3 [[u32val]]
+    values[x].res = WaveActiveAllEqual(values[x].val1) && WaveActiveAllEqual(values[x].val2);
+}

+ 20 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-all-true.vulkan1.2.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint val;
+    bool res;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformVote
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+// CHECK:      [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_1
+// CHECK-NEXT:     {{%\d+}} = OpGroupNonUniformAll %bool %uint_3 [[cmp]]
+    values[x].res = WaveActiveAllTrue(values[x].val == 1);
+}

+ 20 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-any-true.vulkan1.2.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint val;
+    bool res;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformVote
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+// CHECK:      [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_0
+// CHECK-NEXT:     {{%\d+}} = OpGroupNonUniformAny %bool %uint_3 [[cmp]]
+    values[x].res = WaveActiveAnyTrue(values[x].val == 0);
+}

+ 20 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-ballot.vulkan1.2.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint val;
+    uint4 res;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformBallot
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+// CHECK:      [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_2
+// CHECK-NEXT:     {{%\d+}} = OpGroupNonUniformBallot %v4uint %uint_3 [[cmp]]
+    values[x].res = WaveActiveBallot(values[x].val == 2);
+}

+ 38 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-bit-and.vulkan1.2.hlsl

@@ -0,0 +1,38 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// Note: WaveActiveBitAnd() only accepts unsigned interger scalars/vectors.
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint4 val1;
+    uint3 val2;
+    uint2 val3;
+     uint val4;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+    uint4 val1 = values[x].val1;
+    uint3 val2 = values[x].val2;
+    uint2 val3 = values[x].val3;
+     uint val4 = values[x].val4;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4uint %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseAnd %v4uint %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveBitAnd(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseAnd %v3uint %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveBitAnd(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %v2uint %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseAnd %v2uint %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveBitAnd(val3);
+// CHECK:      [[val4:%\d+]] = OpLoad %uint %val4
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseAnd %uint %uint_3 Reduce [[val4]]
+    values[x].val4 = WaveActiveBitAnd(val4);
+}

+ 38 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-bit-or.vulkan1.2.hlsl

@@ -0,0 +1,38 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// Note: WaveActiveBitOr() only accepts unsigned interger scalars/vectors.
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint4 val1;
+    uint3 val2;
+    uint2 val3;
+     uint val4;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+    uint4 val1 = values[x].val1;
+    uint3 val2 = values[x].val2;
+    uint2 val3 = values[x].val3;
+     uint val4 = values[x].val4;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4uint %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseOr %v4uint %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveBitOr(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseOr %v3uint %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveBitOr(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %v2uint %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseOr %v2uint %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveBitOr(val3);
+// CHECK:      [[val4:%\d+]] = OpLoad %uint %val4
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseOr %uint %uint_3 Reduce [[val4]]
+    values[x].val4 = WaveActiveBitOr(val4);
+}

+ 38 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-bit-xor.vulkan1.2.hlsl

@@ -0,0 +1,38 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// Note: WaveActiveBitXor() only accepts unsigned interger scalars/vectors.
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint4 val1;
+    uint3 val2;
+    uint2 val3;
+     uint val4;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+    uint4 val1 = values[x].val1;
+    uint3 val2 = values[x].val2;
+    uint2 val3 = values[x].val3;
+     uint val4 = values[x].val4;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4uint %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseXor %v4uint %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveBitXor(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseXor %v3uint %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveBitXor(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %v2uint %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseXor %v2uint %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveBitXor(val3);
+// CHECK:      [[val4:%\d+]] = OpLoad %uint %val4
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBitwiseXor %uint %uint_3 Reduce [[val4]]
+    values[x].val4 = WaveActiveBitXor(val4);
+}

+ 23 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-count-bits.vulkan1.2.hlsl

@@ -0,0 +1,23 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     uint val;
+};
+
+RWStructuredBuffer<S> values;
+RWStructuredBuffer<S> results;
+
+// CHECK: OpCapability GroupNonUniformBallot
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+// CHECK:         [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_0
+// CHECK-NEXT: [[ballot:%\d+]] = OpGroupNonUniformBallot %v4uint %uint_3 [[cmp]]
+// CHECK:             {{%\d+}} = OpGroupNonUniformBallotBitCount %uint %uint_3 Reduce [[ballot]]
+    results[x].val = WaveActiveCountBits(values[x].val == 0);
+}
+

+ 31 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-max.vulkan1.2.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     uint4 val1;
+    float2 val2;
+       int val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+     uint4 val1 = values[x].val1;
+    float2 val2 = values[x].val2;
+       int val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4uint %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformUMax %v4uint %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveMax(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2float %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformFMax %v2float %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveMax(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %int %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformSMax %int %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveMax(val3);
+}

+ 31 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-min.vulkan1.2.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     uint4 val1;
+    float2 val2;
+       int val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+     uint4 val1 = values[x].val1;
+    float2 val2 = values[x].val2;
+       int val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4uint %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformUMin %v4uint %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveMin(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2float %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformFMin %v2float %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveMin(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %int %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformSMin %int %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveMin(val3);
+}

+ 31 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-product.vulkan1.2.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    float4 val1;
+     uint2 val2;
+       int val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+    float4 val1 = values[x].val1;
+     uint2 val2 = values[x].val2;
+       int val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4float %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformFMul %v4float %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveProduct(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIMul %v2uint %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveProduct(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %int %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIMul %int %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveProduct(val3);
+}

+ 31 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-active-sum.vulkan1.2.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     int4 val1;
+    uint2 val2;
+    float val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+     int4 val1 = values[x].val1;
+    uint2 val2 = values[x].val2;
+    float val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4int %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIAdd %v4int %uint_3 Reduce [[val1]]
+    values[x].val1 = WaveActiveSum(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIAdd %v2uint %uint_3 Reduce [[val2]]
+    values[x].val2 = WaveActiveSum(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %float %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformFAdd %float %uint_3 Reduce [[val3]]
+    values[x].val3 = WaveActiveSum(val3);
+}

+ 20 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-get-lane-count.vulkan1.2.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+RWStructuredBuffer<uint> values;
+
+// CHECK: OpCapability GroupNonUniform
+
+// CHECK: OpEntryPoint GLCompute
+// CHECK-SAME: %SubgroupSize
+
+// CHECK: OpDecorate %SubgroupSize BuiltIn SubgroupSize
+
+// CHECK: %SubgroupSize = OpVariable %_ptr_Input_uint Input
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+// CHECK: OpLoad %uint %SubgroupSize
+    values[id.x] = WaveGetLaneCount();
+}

+ 20 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-get-lane-index.vulkan1.2.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+RWStructuredBuffer<uint> values;
+
+// CHECK: OpCapability GroupNonUniform
+
+// CHECK: OpEntryPoint GLCompute
+// CHECK-SAME: %SubgroupLocalInvocationId
+
+// CHECK: OpDecorate %SubgroupLocalInvocationId BuiltIn SubgroupLocalInvocationId
+
+// CHECK: %SubgroupLocalInvocationId = OpVariable %_ptr_Input_uint Input
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+// CHECK: OpLoad %uint %SubgroupLocalInvocationId
+    values[id.x] = WaveGetLaneIndex();
+}

+ 13 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-is-first-lane.vulkan1.2.hlsl

@@ -0,0 +1,13 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+RWStructuredBuffer<uint> values;
+
+// CHECK: OpCapability GroupNonUniform
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+// CHECK: {{%\d+}} = OpGroupNonUniformElect %bool %uint_3
+    values[id.x] = WaveIsFirstLane();
+}

+ 21 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-count-bits.vulkan1.2.hlsl

@@ -0,0 +1,21 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     uint val;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformBallot
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+// CHECK:         [[cmp:%\d+]] = OpIEqual %bool {{%\d+}} %uint_0
+// CHECK-NEXT: [[ballot:%\d+]] = OpGroupNonUniformBallot %v4uint %uint_3 [[cmp]]
+// CHECK:             {{%\d+}} = OpGroupNonUniformBallotBitCount %uint %uint_3 ExclusiveScan [[ballot]]
+    values[x].val = WavePrefixCountBits(values[x].val == 0);
+}

+ 31 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-product.vulkan1.2.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    float4 val1;
+     uint2 val2;
+       int val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+    float4 val1 = values[x].val1;
+     uint2 val2 = values[x].val2;
+       int val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4float %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformFMul %v4float %uint_3 ExclusiveScan [[val1]]
+    values[x].val1 = WavePrefixProduct(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIMul %v2uint %uint_3 ExclusiveScan [[val2]]
+    values[x].val2 = WavePrefixProduct(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %int %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIMul %int %uint_3 ExclusiveScan [[val3]]
+    values[x].val3 = WavePrefixProduct(val3);
+}

+ 31 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-prefix-sum.vulkan1.2.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+     int4 val1;
+    uint2 val2;
+    float val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformArithmetic
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+     int4 val1 = values[x].val1;
+    uint2 val2 = values[x].val2;
+    float val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4int %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIAdd %v4int %uint_3 ExclusiveScan [[val1]]
+    values[x].val1 = WavePrefixSum(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformIAdd %v2uint %uint_3 ExclusiveScan [[val2]]
+    values[x].val2 = WavePrefixSum(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %float %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformFAdd %float %uint_3 ExclusiveScan [[val3]]
+    values[x].val3 = WavePrefixSum(val3);
+}

+ 32 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-read-lane-at.vulkan1.2.hlsl

@@ -0,0 +1,32 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    float4 val1;
+     uint3 val2;
+       int val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformBallot
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+    float4 val1 = values[x].val1;
+     uint3 val2 = values[x].val2;
+       int val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4float %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBroadcast %v4float %uint_3 [[val1]] %uint_15
+    values[x].val1 = WaveReadLaneAt(val1, 15);
+// CHECK:      [[val2:%\d+]] = OpLoad %v3uint %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBroadcast %v3uint %uint_3 [[val2]] %uint_42
+    values[x].val2 = WaveReadLaneAt(val2, 42);
+// CHECK:      [[val3:%\d+]] = OpLoad %int %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBroadcast %int %uint_3 [[val3]] %uint_15
+    values[x].val3 = WaveReadLaneAt(val3, 15);
+}

+ 32 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave-read-lane-first.vulkan1.2.hlsl

@@ -0,0 +1,32 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// CHECK: ; Version: 1.5
+
+struct S {
+    uint4 val1;
+     int2 val2;
+    float val3;
+};
+
+RWStructuredBuffer<S> values;
+
+// CHECK: OpCapability GroupNonUniformBallot
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+    uint x = id.x;
+
+    uint4 val1 = values[x].val1;
+     int2 val2 = values[x].val2;
+    float val3 = values[x].val3;
+
+// CHECK:      [[val1:%\d+]] = OpLoad %v4uint %val1
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBroadcastFirst %v4uint %uint_3 [[val1]]
+    values[x].val1 = WaveReadLaneFirst(val1);
+// CHECK:      [[val2:%\d+]] = OpLoad %v2int %val2
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBroadcastFirst %v2int %uint_3 [[val2]]
+    values[x].val2 = WaveReadLaneFirst(val2);
+// CHECK:      [[val3:%\d+]] = OpLoad %float %val3
+// CHECK-NEXT:      {{%\d+}} = OpGroupNonUniformBroadcastFirst %float %uint_3 [[val3]]
+    values[x].val3 = WaveReadLaneFirst(val3);
+}

+ 27 - 0
tools/clang/test/CodeGenSPIRV/sm6.wave.builtin.no-dup.vulkan1.2.hlsl

@@ -0,0 +1,27 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// Some wave ops translates into SPIR-V builtin variables.
+// Test that we are not generating duplicated builtins for multiple calls of
+// of the same wave ops.
+RWStructuredBuffer<uint> values;
+
+// CHECK: OpEntryPoint GLCompute
+// CHECK-SAME: %SubgroupSize %SubgroupLocalInvocationId
+
+// CHECK: OpDecorate %SubgroupSize BuiltIn SubgroupSize
+// CHECK-NOT: OpDecorate {{%\w+}} BuiltIn SubgroupSize
+
+// CHECK: OpDecorate %SubgroupLocalInvocationId BuiltIn SubgroupLocalInvocationId
+// CHECK-NOT: OpDecorate {{%\w+}} BuiltIn SubgroupLocalInvocationId
+
+// CHECK: %SubgroupSize = OpVariable %_ptr_Input_uint Input
+// CHECK-NEXT: %SubgroupLocalInvocationId = OpVariable %_ptr_Input_uint Input
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+// CHECK: OpLoad %uint %SubgroupSize
+// CHECK: OpLoad %uint %SubgroupSize
+// CHECK: OpLoad %uint %SubgroupLocalInvocationId
+// CHECK: OpLoad %uint %SubgroupLocalInvocationId
+    values[id.x] = WaveGetLaneCount() + WaveGetLaneCount() + WaveGetLaneIndex() + WaveGetLaneIndex();
+}

+ 38 - 0
tools/clang/test/CodeGenSPIRV/vk.1p2.block-decoration.hlsl

@@ -0,0 +1,38 @@
+// Run: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.2
+
+// We cannot use BufferBlock decoration for SPIR-V 1.4 or above.
+// Instead, we must use Block decorated StorageBuffer Storage Class.
+
+// CHECK: ; Version: 1.5
+
+// CHECK: OpDecorate %type_ByteAddressBuffer Block
+// CHECK: OpDecorate %type_RWByteAddressBuffer Block
+// CHECK: OpDecorate %type_TextureBuffer_S Block
+// CHECK: OpDecorate %type_StructuredBuffer_v3uint Block
+
+// CHECK: %_ptr_StorageBuffer_type_ByteAddressBuffer = OpTypePointer StorageBuffer %type_ByteAddressBuffer
+// CHECK: %_ptr_StorageBuffer_type_RWByteAddressBuffer = OpTypePointer StorageBuffer %type_RWByteAddressBuffer
+// CHECK: %_ptr_StorageBuffer_type_TextureBuffer_S = OpTypePointer StorageBuffer %type_TextureBuffer_S
+// CHECK: %_ptr_StorageBuffer_type_StructuredBuffer_v3uint = OpTypePointer StorageBuffer %type_StructuredBuffer_v3uint
+// CHECK: %rwsb = OpVariable %_ptr_StorageBuffer_type_RWStructuredBuffer_S StorageBuffer
+// CHECK: %counter_var_rwsb = OpVariable %_ptr_StorageBuffer_type_ACSBuffer_counter StorageBuffer
+struct S {
+  float4 f[5];
+};
+
+ByteAddressBuffer bab;
+RWByteAddressBuffer rwbab;
+TextureBuffer<S> tb;
+StructuredBuffer<uint3> sb;
+RWStructuredBuffer<S> rwsb;
+
+[numthreads(1, 1, 1)]
+void main() {
+// CHECK:   [[vec:%\d+]] = OpAccessChain %_ptr_StorageBuffer_v4float %rwsb %int_0 %uint_2 %int_0 %int_1
+// CHECK:       {{%\d+}} = OpAccessChain %_ptr_StorageBuffer_float [[vec]] %int_0
+  float a = rwsb[2].f[1].x;
+
+// CHECK: [[counterPtr:%\d+]] = OpAccessChain %_ptr_StorageBuffer_int %counter_var_rwsb %uint_0
+// CHECK:            {{%\d+}} = OpAtomicIAdd %int [[counterPtr]] %uint_1 %uint_0 %int_1
+  rwsb.IncrementCounter();
+}

+ 39 - 0
tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

@@ -2236,4 +2236,43 @@ TEST_F(FileTest, Vk1p2EntryPoint) {
   runFileTest("vk.1p2.entry-point.hlsl");
 }
 
+// Test deprecation of BufferBlock decoration after SPIR-V 1.3.
+TEST_F(FileTest, Vk1p2BlockDecoration) {
+  useVulkan1p2();
+  runFileTest("vk.1p2.block-decoration.hlsl");
+}
+
+// Test shaders that require Vulkan1.1 support with
+// -fspv-target-env=vulkan1.2 option to make sure that enabling
+// Vulkan1.2 also enables Vulkan1.1.
+TEST_F(FileTest, CompatibilityWithVk1p1) {
+  useVulkan1p2();
+  runFileTest("meshshading.nv.fncall.amplification.vulkan1.2.hlsl");
+  runFileTest("sm6.quad-read-across-diagonal.vulkan1.2.hlsl");
+  runFileTest("sm6.quad-read-across-x.vulkan1.2.hlsl");
+  runFileTest("sm6.quad-read-across-y.vulkan1.2.hlsl");
+  runFileTest("sm6.quad-read-lane-at.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-all-equal.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-all-true.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-any-true.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-ballot.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-bit-and.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-bit-or.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-bit-xor.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-count-bits.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-max.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-min.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-product.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-active-sum.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-get-lane-count.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-get-lane-index.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-is-first-lane.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-prefix-count-bits.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-prefix-product.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-prefix-sum.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-read-lane-at.vulkan1.2.hlsl");
+  runFileTest("sm6.wave-read-lane-first.vulkan1.2.hlsl");
+  runFileTest("sm6.wave.builtin.no-dup.vulkan1.2.hlsl");
+}
+
 } // namespace