Bladeren bron

Fix a bug in buffer store when storing matrix of size greater than four (#3422)

Vishal Sharma 4 jaren geleden
bovenliggende
commit
991205e511

+ 99 - 61
lib/HLSL/HLOperationLower.cpp

@@ -3851,6 +3851,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   storeArgs.emplace_back(opArg);  // opcode
   storeArgs.emplace_back(handle); // resource handle
 
+  unsigned offset0Idx = 0;
   if (RK == DxilResource::Kind::RawBuffer ||
       RK == DxilResource::Kind::TypedBuffer) {
     // Offset 0
@@ -3861,6 +3862,9 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
       storeArgs.emplace_back(offset); // offset
     }
 
+    // Store offset0 for later use
+    offset0Idx = storeArgs.size() - 1;
+
     // Offset 1
     storeArgs.emplace_back(undefI);
   } else {
@@ -3873,6 +3877,9 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     else
       storeArgs.emplace_back(offset);
 
+    // Store offset0 for later use
+    offset0Idx = storeArgs.size() - 1;
+
     for (unsigned i = 1; i < 3; i++) {
       if (i < coordSize)
         storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
@@ -3882,76 +3889,107 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     // TODO: support mip for texture ST
   }
 
-  // values
-  uint8_t mask = 0;
-  if (Ty->isVectorTy()) {
-    unsigned vecSize = Ty->getVectorNumElements();
-    Value *emptyVal = undefVal;
-    if (isTyped) {
-      mask = DXIL::kCompMask_All;
-      emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
+  constexpr unsigned MaxStoreElemCount = 4;
+  const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1;
+  const unsigned StoreInstCount = (CompCount / MaxStoreElemCount) + (CompCount % MaxStoreElemCount != 0);
+  SmallVector<decltype(storeArgs), 4> storeArgsList;
+
+  // Max number of element to store should be 16 (for a 4x4 matrix)
+  DXASSERT_NOMSG(StoreInstCount >= 1 && StoreInstCount <= 4);
+  
+  // If number of elements to store exceeds the maximum number of elements
+  // that can be stored in a single store call,  make sure to generate enough 
+  // store calls to store all elements
+  for (unsigned j = 0; j < StoreInstCount; j++) {
+    decltype(storeArgs) newStoreArgs;
+    for (Value* storeArg : storeArgs)
+      newStoreArgs.emplace_back(storeArg);
+    storeArgsList.emplace_back(newStoreArgs);
+  }
+
+  for (unsigned j = 0; j < storeArgsList.size(); j++) {
+
+    // For second and subsequent store calls, increment the offset0 (i.e. store index)
+    if (j > 0) {
+      Value* newOffset = ConstantInt::get(Builder.getInt32Ty(), j);
+      newOffset = Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffset);
+      storeArgsList[j][offset0Idx] = newOffset;
     }
 
-    for (unsigned i = 0; i < 4; i++) {
-      if (i < vecSize) {
-        storeArgs.emplace_back(Builder.CreateExtractElement(val, i));
-        mask |= (1<<i);
-      } else {
-        storeArgs.emplace_back(emptyVal);
+    // values
+    uint8_t mask = 0;
+    if (Ty->isVectorTy()) {
+      unsigned vecSize = std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - (j * MaxStoreElemCount);
+      Value* emptyVal = undefVal;
+      if (isTyped) {
+        mask = DXIL::kCompMask_All;
+        emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
       }
-    }
 
-  } else {
-    if (isTyped) {
-      mask = DXIL::kCompMask_All;
-      storeArgs.emplace_back(val);
-      storeArgs.emplace_back(val);
-      storeArgs.emplace_back(val);
-      storeArgs.emplace_back(val);
-    } else {
-      storeArgs.emplace_back(val);
-      storeArgs.emplace_back(undefVal);
-      storeArgs.emplace_back(undefVal);
-      storeArgs.emplace_back(undefVal);
-      mask = DXIL::kCompMask_X;
-    }
-  }
+      for (unsigned i = 0; i < MaxStoreElemCount; i++) {
+        if (i < vecSize) {
+          storeArgsList[j].emplace_back(Builder.CreateExtractElement(val, (j * MaxStoreElemCount) + i));
+          mask |= (1 << i);
+        }
+        else {
+          storeArgsList[j].emplace_back(emptyVal);
+        }
+      }
 
-  if (is64 && isTyped) {
-    unsigned size = 1;
-    if (Ty->isVectorTy()) {
-      size = Ty->getVectorNumElements();
-    }
-    DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
-    unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
-                             ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
-                             : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
-    Value *V0 = storeArgs[val0OpIdx];
-    Value *V1 = storeArgs[val0OpIdx+1];
-
-    Value *vals32[4];
-    EltTy = Ty->getScalarType();
-    Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
-    // Fill the uninit vals.
-    if (size == 1) {
-      vals32[2] = vals32[0];
-      vals32[3] = vals32[1];
     }
-    // Change valOp to 32 version.
-    for (unsigned i = 0; i < 4; i++) {
-      storeArgs[val0OpIdx + i] = vals32[i];
+    else {
+      if (isTyped) {
+        mask = DXIL::kCompMask_All;
+        storeArgsList[j].emplace_back(val);
+        storeArgsList[j].emplace_back(val);
+        storeArgsList[j].emplace_back(val);
+        storeArgsList[j].emplace_back(val);
+      }
+      else {
+        storeArgsList[j].emplace_back(val);
+        storeArgsList[j].emplace_back(undefVal);
+        storeArgsList[j].emplace_back(undefVal);
+        storeArgsList[j].emplace_back(undefVal);
+        mask = DXIL::kCompMask_X;
+      }
     }
-    // change mask for double
-    if (opcode == DXIL::OpCode::RawBufferStore) {
-      mask = size == 1 ?
-        DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
+
+    if (is64 && isTyped) {
+      unsigned size = 1;
+      if (Ty->isVectorTy()) {
+        size = std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - (j * MaxStoreElemCount);
+      }
+      DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
+      unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
+        ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
+        : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
+      Value* V0 = storeArgsList[j][val0OpIdx];
+      Value* V1 = storeArgsList[j][val0OpIdx + 1];
+
+      Value* vals32[4];
+      EltTy = Ty->getScalarType();
+      Split64bitValForStore(EltTy, { V0, V1 }, size, vals32, OP, Builder);
+      // Fill the uninit vals.
+      if (size == 1) {
+        vals32[2] = vals32[0];
+        vals32[3] = vals32[1];
+      }
+      // Change valOp to 32 version.
+      for (unsigned i = 0; i < 4; i++) {
+        storeArgsList[j][val0OpIdx + i] = vals32[i];
+      }
+      // change mask for double
+      if (opcode == DXIL::OpCode::RawBufferStore) {
+        mask = size == 1 ?
+          DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
+      }
     }
-  }
 
-  storeArgs.emplace_back(OP->GetU8Const(mask)); // mask
-  if (opcode == DXIL::OpCode::RawBufferStore)
-    storeArgs.emplace_back(Alignment); // alignment only for raw buffer
-  Builder.CreateCall(F, storeArgs);
+    storeArgsList[j].emplace_back(OP->GetU8Const(mask)); // mask
+    if (opcode == DXIL::OpCode::RawBufferStore)
+      storeArgsList[j].emplace_back(Alignment); // alignment only for raw buffer
+    Builder.CreateCall(F, storeArgsList[j]);
+  }
 }
 
 Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,

+ 82 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/rwbab_incomplete_mat_store_const_init_zpc.hlsl

@@ -0,0 +1,82 @@
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST1=1 %s | FileCheck %s -check-prefix=CHK_TEST1
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST2=1 %s | FileCheck %s -check-prefix=CHK_TEST2
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST3=1 %s | FileCheck %s -check-prefix=CHK_TEST3
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST4=1 %s | FileCheck %s -check-prefix=CHK_TEST4 | XFail Github #3423
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST5=1 %s | FileCheck %s -check-prefix=CHK_TEST5
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST6=1 %s | FileCheck %s -check-prefix=CHK_TEST6
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST7=1 %s | FileCheck %s -check-prefix=CHK_TEST7
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST8=1 %s | FileCheck %s -check-prefix=CHK_TEST8
+// RUN: %dxc -E main -T vs_6_5 -Zpc -DTEST9=1 %s | FileCheck %s -check-prefix=CHK_TEST9
+// RUN: %dxc -E main -T vs_6_5 -Zpc %s | FileCheck %s -check-prefix=CHK_TEST10
+
+// Regression test for github bug #3225
+
+RWByteAddressBuffer buffer;
+
+void main()
+{
+#ifdef TEST1
+  // CHK_TEST1: dx.op.rawBufferStore.f32
+  // CHK_TEST1: i32 0, i32 undef, float 1.000000e+00
+  float1x1 t = {1};
+#elif TEST2
+  // CHK_TEST2: dx.op.rawBufferStore.f32
+  // CHK_TEST2: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00
+  float1x2 t = {1,2};
+#elif TEST3  
+  // CHK_TEST3: dx.op.rawBufferStore.f32
+  // CHK_TEST3: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00
+  float2x1 t = {1,2};
+#elif TEST4  
+  // CHK_TEST4: dx.op.rawBufferStore.f32
+  // CHK_TEST4: i32 0, i32 undef, float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00
+  float2x2 t = {1,2,3,4};
+#elif TEST5  
+  // CHK_TEST5: dx.op.rawBufferStore.f32
+  // CHK_TEST5: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST5: dx.op.rawBufferStore.f32
+  // CHK_TEST5: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  float2x3 t = {1,2,3,4,5,6};
+#elif TEST6
+  // CHK_TEST6: dx.op.rawBufferStore.f32
+  // CHK_TEST6: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST6: dx.op.rawBufferStore.f32
+  // CHK_TEST6: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  float3x2 t = {1,2,3,4,5,6};
+#elif TEST7  
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: i32 2, i32 undef, float 9.000000e+00
+  float3x3 t = {1,2,3,4,5,6,7,8,9};
+#elif TEST8  
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  float3x4 t = {1,2,3,4,5,6,7,8,9,10,11,12};
+#elif TEST9  
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  float4x3 t = {1,2,3,4,5,6,7,8,9,10,11,12};
+#else
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 3, i32 undef, float 1.300000e+01, float 1.400000e+01, float 1.500000e+01, float 1.600000e+01
+	float4x4 t = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+#endif
+	buffer.Store(0, t);
+}

+ 82 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/rwbab_incomplete_mat_store_const_init_zpr.hlsl

@@ -0,0 +1,82 @@
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST1=1 %s | FileCheck %s -check-prefix=CHK_TEST1
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST2=1 %s | FileCheck %s -check-prefix=CHK_TEST2
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST3=1 %s | FileCheck %s -check-prefix=CHK_TEST3
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST4=1 %s | FileCheck %s -check-prefix=CHK_TEST4
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST5=1 %s | FileCheck %s -check-prefix=CHK_TEST5
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST6=1 %s | FileCheck %s -check-prefix=CHK_TEST6
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST7=1 %s | FileCheck %s -check-prefix=CHK_TEST7
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST8=1 %s | FileCheck %s -check-prefix=CHK_TEST8
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTEST9=1 %s | FileCheck %s -check-prefix=CHK_TEST9
+// RUN: %dxc -E main -T vs_6_5 -Zpr %s | FileCheck %s -check-prefix=CHK_TEST10
+
+// Regression test for github bug #3225
+
+RWByteAddressBuffer buffer;
+
+void main()
+{
+#ifdef TEST1
+  // CHK_TEST1: dx.op.rawBufferStore.f32
+  // CHK_TEST1: i32 0, i32 undef, float 1.000000e+00
+  float1x1 t = {1};
+#elif TEST2
+  // CHK_TEST2: dx.op.rawBufferStore.f32
+  // CHK_TEST2: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00
+  float1x2 t = {1,2};
+#elif TEST3  
+  // CHK_TEST3: dx.op.rawBufferStore.f32
+  // CHK_TEST3: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00
+  float2x1 t = {1,2};
+#elif TEST4  
+  // CHK_TEST4: dx.op.rawBufferStore.f32
+  // CHK_TEST4: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  float2x2 t = {1,2,3,4};
+#elif TEST5  
+  // CHK_TEST5: dx.op.rawBufferStore.f32
+  // CHK_TEST5: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST5: dx.op.rawBufferStore.f32
+  // CHK_TEST5: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  float2x3 t = {1,2,3,4,5,6};
+#elif TEST6
+  // CHK_TEST6: dx.op.rawBufferStore.f32
+  // CHK_TEST6: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST6: dx.op.rawBufferStore.f32
+  // CHK_TEST6: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  float3x2 t = {1,2,3,4,5,6};
+#elif TEST7  
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: i32 2, i32 undef, float 9.000000e+00
+  float3x3 t = {1,2,3,4,5,6,7,8,9};
+#elif TEST8  
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  float3x4 t = {1,2,3,4,5,6,7,8,9,10,11,12};
+#elif TEST9  
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  float4x3 t = {1,2,3,4,5,6,7,8,9,10,11,12};
+#else
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: i32 3, i32 undef, float 1.300000e+01, float 1.400000e+01, float 1.500000e+01, float 1.600000e+01
+	float4x4 t = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+#endif
+	buffer.Store(0, t);
+}

+ 62 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/rwbab_incomplete_mat_store_zpr.hlsl

@@ -0,0 +1,62 @@
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float1x1 %s | FileCheck %s -check-prefix=CHK_TEST1
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float1x2 %s | FileCheck %s -check-prefix=CHK_TEST2
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float2x1 %s | FileCheck %s -check-prefix=CHK_TEST3
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float2x2 %s | FileCheck %s -check-prefix=CHK_TEST4
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float2x3 %s | FileCheck %s -check-prefix=CHK_TEST5
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float3x2 %s | FileCheck %s -check-prefix=CHK_TEST6
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float3x3 %s | FileCheck %s -check-prefix=CHK_TEST7
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float3x4 %s | FileCheck %s -check-prefix=CHK_TEST8
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float4x3 %s | FileCheck %s -check-prefix=CHK_TEST9
+// RUN: %dxc -E main -T vs_6_5 -Zpr -DTY=float4x4 %s | FileCheck %s -check-prefix=CHK_TEST10
+
+// Regression test for github bug #3225
+
+RWByteAddressBuffer buffer;
+
+void main(uint i : IN0, TY t : IN1)
+{
+  // CHK_TEST1: dx.op.rawBufferStore.f32
+
+  // CHK_TEST2: dx.op.rawBufferStore.f32
+
+  // CHK_TEST3: dx.op.rawBufferStore.f32
+
+  // CHK_TEST4: dx.op.rawBufferStore.f32
+
+  // CHK_TEST5: dx.op.rawBufferStore.f32
+  // CHK_TEST5: add i32 %{{.*}}, 1
+  // CHK_TEST5: dx.op.rawBufferStore.f32
+  
+  // CHK_TEST6: dx.op.rawBufferStore.f32
+  // CHK_TEST6: add i32 %{{.*}}, 1
+  // CHK_TEST6: dx.op.rawBufferStore.f32
+  
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: add i32 %{{.*}}, 1
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  // CHK_TEST7: add i32 %{{.*}}, 2
+  // CHK_TEST7: dx.op.rawBufferStore.f32
+  
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: add i32 %{{.*}}, 1
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  // CHK_TEST8: add i32 %{{.*}}, 2
+  // CHK_TEST8: dx.op.rawBufferStore.f32
+  
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: add i32 %{{.*}}, 1
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  // CHK_TEST9: add i32 %{{.*}}, 2
+  // CHK_TEST9: dx.op.rawBufferStore.f32
+  
+  
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: add i32 %{{.*}}, 1
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: add i32 %{{.*}}, 2
+  // CHK_TEST10: dx.op.rawBufferStore.f32
+  // CHK_TEST10: add i32 %{{.*}}, 3
+  // CHK_TEST10: dx.op.rawBufferStore.f32  
+
+	buffer.Store(i, t);
+}