Explorar o código

Load remainder buffer elements in the end (#3462)

Vishal Sharma %!s(int64=4) %!d(string=hai) anos
pai
achega
93a20f7967

+ 9 - 10
lib/HLSL/HLOperationLower.cpp

@@ -6883,16 +6883,7 @@ static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned ElemCount,
 
   std::vector<Value*> elts(ElemCount);
   unsigned rest = (ElemCount % 4);
-  if (rest) {
-    Value* ResultElts[4];
-    Value *bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, rest, alignment);
-    bufLds.emplace_back(bufLd);
-    for (unsigned i = 0; i < rest; i++)
-      elts[i] = ResultElts[i];
-    offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
-  }
-
-  for (unsigned i = rest; i < ElemCount; i += 4) {
+  for (unsigned i = 0; i < ElemCount-rest; i += 4) {
     Value* ResultElts[4];
     Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, 4, alignment);
     bufLds.emplace_back(bufLd);
@@ -6905,6 +6896,14 @@ static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned ElemCount,
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
   }
 
+  if (rest) {
+    Value* ResultElts[4];
+    Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, rest, alignment);
+    bufLds.emplace_back(bufLd);
+    for (unsigned i = 0; i < rest; i++)
+      elts[ElemCount - rest + i] = ResultElts[i];
+  }
+
   // If the expected return type is scalar then skip building a vector
   if (isScalarTy) {
     return elts[0];

+ 7 - 7
tools/clang/test/HLSLFileCheck/hlsl/objects/StructuredBuffer/struct_buf_mat_load_method.hlsl

@@ -24,15 +24,15 @@
 
 // CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
 
-// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
-// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
 
-// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
-// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
 
-// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
-// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 4)
-// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 20, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 1, i32 4)
 
 // CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
 // CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)

+ 7 - 7
tools/clang/test/HLSLFileCheck/hlsl/objects/StructuredBuffer/struct_buf_mat_subscript_method.hlsl

@@ -24,15 +24,15 @@
 
 // CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
 
-// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
-// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
 
-// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
-// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
 
-// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
-// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 4)
-// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 20, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 1, i32 4)
 
 // CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
 // CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)