浏览代码

Fix byte-offset calculation when writing to RWByteAddressBuffer with elements greater than four (#3485)

Vishal Sharma 4 年之前
父节点
当前提交
43267d1b06

+ 13 - 3
lib/HLSL/HLOperationLower.cpp

@@ -3806,6 +3806,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
                     Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
   Type *Ty = val->getType();
 
+  // This function is no longer used for lowering stores to a
+  // structured buffer.
+  DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer);
+
   OP::OpCode opcode = OP::OpCode::NumOpCodes;
   switch (RK) {
   case DxilResource::Kind::RawBuffer:
@@ -3921,9 +3925,15 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
 
     // For second and subsequent store calls, increment the offset0 (i.e. store index)
     if (j > 0) {
-      Value* newOffset = ConstantInt::get(Builder.getInt32Ty(), j);
-      newOffset = Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffset);
-      storeArgsList[j][offset0Idx] = newOffset;
+      // Greater than four-components store is not allowed for
+      // TypedBuffer and Textures. So greater than four elements
+      // scenario should only get hit here for RawBuffer.
+      DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer);
+      unsigned EltSize = OP->GetAllocSizeForType(EltTy);
+      unsigned newOffset = EltSize * MaxStoreElemCount * j;
+      Value* newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset);
+      newOffsetVal = Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal);
+      storeArgsList[j][offset0Idx] = newOffsetVal;
     }
 
     // values

+ 11 - 11
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/rwbab_incomplete_mat_store_const_init_zpc.hlsl

@@ -35,47 +35,47 @@ void main()
   // CHK_TEST5: dx.op.rawBufferStore.f32
   // CHK_TEST5: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST5: dx.op.rawBufferStore.f32
-  // CHK_TEST5: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  // CHK_TEST5: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00
   float2x3 t = {1,2,3,4,5,6};
 #elif TEST6
   // CHK_TEST6: dx.op.rawBufferStore.f32
   // CHK_TEST6: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST6: dx.op.rawBufferStore.f32
-  // CHK_TEST6: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  // CHK_TEST6: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00
   float3x2 t = {1,2,3,4,5,6};
 #elif TEST7  
   // CHK_TEST7: dx.op.rawBufferStore.f32
   // CHK_TEST7: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST7: dx.op.rawBufferStore.f32
-  // CHK_TEST7: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST7: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST7: dx.op.rawBufferStore.f32
-  // CHK_TEST7: i32 2, i32 undef, float 9.000000e+00
+  // CHK_TEST7: i32 32, i32 undef, float 9.000000e+00
   float3x3 t = {1,2,3,4,5,6,7,8,9};
 #elif TEST8  
   // CHK_TEST8: dx.op.rawBufferStore.f32
   // CHK_TEST8: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST8: dx.op.rawBufferStore.f32
-  // CHK_TEST8: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST8: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST8: dx.op.rawBufferStore.f32
-  // CHK_TEST8: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST8: i32 32, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
   float3x4 t = {1,2,3,4,5,6,7,8,9,10,11,12};
 #elif TEST9  
   // CHK_TEST9: dx.op.rawBufferStore.f32
   // CHK_TEST9: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST9: dx.op.rawBufferStore.f32
-  // CHK_TEST9: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST9: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST9: dx.op.rawBufferStore.f32
-  // CHK_TEST9: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST9: i32 32, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
   float4x3 t = {1,2,3,4,5,6,7,8,9,10,11,12};
 #else
   // CHK_TEST10: dx.op.rawBufferStore.f32
   // CHK_TEST10: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST10: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST10: i32 32, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: i32 3, i32 undef, float 1.300000e+01, float 1.400000e+01, float 1.500000e+01, float 1.600000e+01
+  // CHK_TEST10: i32 48, i32 undef, float 1.300000e+01, float 1.400000e+01, float 1.500000e+01, float 1.600000e+01
 	float4x4 t = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
 #endif
 	buffer.Store(0, t);

+ 11 - 11
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/rwbab_incomplete_mat_store_const_init_zpr.hlsl

@@ -35,47 +35,47 @@ void main()
   // CHK_TEST5: dx.op.rawBufferStore.f32
   // CHK_TEST5: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST5: dx.op.rawBufferStore.f32
-  // CHK_TEST5: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  // CHK_TEST5: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00
   float2x3 t = {1,2,3,4,5,6};
 #elif TEST6
   // CHK_TEST6: dx.op.rawBufferStore.f32
   // CHK_TEST6: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST6: dx.op.rawBufferStore.f32
-  // CHK_TEST6: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00
+  // CHK_TEST6: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00
   float3x2 t = {1,2,3,4,5,6};
 #elif TEST7  
   // CHK_TEST7: dx.op.rawBufferStore.f32
   // CHK_TEST7: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST7: dx.op.rawBufferStore.f32
-  // CHK_TEST7: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST7: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST7: dx.op.rawBufferStore.f32
-  // CHK_TEST7: i32 2, i32 undef, float 9.000000e+00
+  // CHK_TEST7: i32 32, i32 undef, float 9.000000e+00
   float3x3 t = {1,2,3,4,5,6,7,8,9};
 #elif TEST8  
   // CHK_TEST8: dx.op.rawBufferStore.f32
   // CHK_TEST8: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST8: dx.op.rawBufferStore.f32
-  // CHK_TEST8: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST8: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST8: dx.op.rawBufferStore.f32
-  // CHK_TEST8: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST8: i32 32, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
   float3x4 t = {1,2,3,4,5,6,7,8,9,10,11,12};
 #elif TEST9  
   // CHK_TEST9: dx.op.rawBufferStore.f32
   // CHK_TEST9: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST9: dx.op.rawBufferStore.f32
-  // CHK_TEST9: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST9: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST9: dx.op.rawBufferStore.f32
-  // CHK_TEST9: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST9: i32 32, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
   float4x3 t = {1,2,3,4,5,6,7,8,9,10,11,12};
 #else
   // CHK_TEST10: dx.op.rawBufferStore.f32
   // CHK_TEST10: i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: i32 1, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
+  // CHK_TEST10: i32 16, i32 undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: i32 2, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
+  // CHK_TEST10: i32 32, i32 undef, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: i32 3, i32 undef, float 1.300000e+01, float 1.400000e+01, float 1.500000e+01, float 1.600000e+01
+  // CHK_TEST10: i32 48, i32 undef, float 1.300000e+01, float 1.400000e+01, float 1.500000e+01, float 1.600000e+01
 	float4x4 t = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
 #endif
 	buffer.Store(0, t);

+ 11 - 11
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/rwbab_incomplete_mat_store_zpr.hlsl

@@ -24,38 +24,38 @@ void main(uint i : IN0, TY t : IN1)
   // CHK_TEST4: dx.op.rawBufferStore.f32
 
   // CHK_TEST5: dx.op.rawBufferStore.f32
-  // CHK_TEST5: add i32 %{{.*}}, 1
+  // CHK_TEST5: add i32 %{{.*}}, 16
   // CHK_TEST5: dx.op.rawBufferStore.f32
   
   // CHK_TEST6: dx.op.rawBufferStore.f32
-  // CHK_TEST6: add i32 %{{.*}}, 1
+  // CHK_TEST6: add i32 %{{.*}}, 16
   // CHK_TEST6: dx.op.rawBufferStore.f32
   
   // CHK_TEST7: dx.op.rawBufferStore.f32
-  // CHK_TEST7: add i32 %{{.*}}, 1
+  // CHK_TEST7: add i32 %{{.*}}, 16
   // CHK_TEST7: dx.op.rawBufferStore.f32
-  // CHK_TEST7: add i32 %{{.*}}, 2
+  // CHK_TEST7: add i32 %{{.*}}, 32
   // CHK_TEST7: dx.op.rawBufferStore.f32
   
   // CHK_TEST8: dx.op.rawBufferStore.f32
-  // CHK_TEST8: add i32 %{{.*}}, 1
+  // CHK_TEST8: add i32 %{{.*}}, 16
   // CHK_TEST8: dx.op.rawBufferStore.f32
-  // CHK_TEST8: add i32 %{{.*}}, 2
+  // CHK_TEST8: add i32 %{{.*}}, 32
   // CHK_TEST8: dx.op.rawBufferStore.f32
   
   // CHK_TEST9: dx.op.rawBufferStore.f32
-  // CHK_TEST9: add i32 %{{.*}}, 1
+  // CHK_TEST9: add i32 %{{.*}}, 16
   // CHK_TEST9: dx.op.rawBufferStore.f32
-  // CHK_TEST9: add i32 %{{.*}}, 2
+  // CHK_TEST9: add i32 %{{.*}}, 32
   // CHK_TEST9: dx.op.rawBufferStore.f32
   
   
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: add i32 %{{.*}}, 1
+  // CHK_TEST10: add i32 %{{.*}}, 16
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: add i32 %{{.*}}, 2
+  // CHK_TEST10: add i32 %{{.*}}, 32
   // CHK_TEST10: dx.op.rawBufferStore.f32
-  // CHK_TEST10: add i32 %{{.*}}, 3
+  // CHK_TEST10: add i32 %{{.*}}, 48
   // CHK_TEST10: dx.op.rawBufferStore.f32  
 
 	buffer.Store(i, t);