Quellcode durchsuchen

Fix issues with structured buffer load lowering (#3454)

Vishal Sharma vor 4 Jahren
Ursprung
Commit
99f28fc529

+ 53 - 24
lib/HLSL/HLOperationLower.cpp

@@ -3578,6 +3578,10 @@ Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
   MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
   IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
 
+static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned VecElemCount,
+  IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
+  Value* bufIdx, Value* baseOffset, const DataLayout& DL, std::vector<Value*>& bufLds, bool isScalarTy = false);
+
 void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
                    IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
 
@@ -3607,12 +3611,22 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   }
 
   if (DXIL::IsStructuredBuffer(RK)) {
-    // Basic type case for StructuredBuffer::Load()
-    Value *ResultElts[4];
-    Value *StructBufLoad = GenerateStructBufLd(helper.handle, helper.addr, OP->GetU32Const(0),
-      helper.status, EltTy, ResultElts, OP, Builder, numComponents, Alignment);
-    dxilutil::MigrateDebugValue(helper.retVal, StructBufLoad);
-    Value *retValNew = ScalarizeElements(Ty, ResultElts, Builder);
+    std::vector<Value*> bufLds;
+    const bool isBool = EltTy->isIntegerTy(1);
+
+    // Bool are represented as i32 in memory
+    Type* MemReprTy = isBool ? Builder.getInt32Ty() : EltTy;
+    bool isScalarTy = !Ty->isVectorTy();
+    Value* retValNew = TranslateStructBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
+      helper.addr, OP->GetU32Const(0), DL, bufLds, isScalarTy);
+    DXASSERT_NOMSG(!bufLds.empty());
+    dxilutil::MigrateDebugValue(helper.retVal, bufLds.front());
+
+    if (isBool) {
+      // Convert result back to register representation.
+      retValNew = Builder.CreateICmpNE(retValNew, Constant::getNullValue(retValNew->getType()));
+    }
+
     helper.retVal->replaceAllUsesWith(retValNew);
     helper.retVal = retValNew;
     return;
@@ -6856,34 +6870,32 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
   Builder.CreateCall(dxilF, Args);
 }
 
-Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
-                               Value *handle, hlsl::OP *OP, Value *status,
-                               Value *bufIdx, Value *baseOffset,
-                               const DataLayout &DL) {
-  HLMatrixType MatTy = HLMatrixType::cast(matType);
-  Type *EltTy = MatTy.getElementTypeForMem();
-  unsigned  EltSize = DL.getTypeAllocSize(EltTy);
+
+static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned ElemCount,
+  IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
+  Value* bufIdx, Value* baseOffset, const DataLayout& DL, std::vector<Value*> &bufLds, bool isScalarTy) {
+  unsigned  EltSize = DL.getTypeAllocSize(VecEltTy);
   Constant* alignment = OP->GetI32Const(EltSize);
 
-  Value *offset = baseOffset;
+  Value* offset = baseOffset;
   if (baseOffset == nullptr)
     offset = OP->GetU32Const(0);
 
-  unsigned matSize = MatTy.getNumElements();
-  std::vector<Value *> elts(matSize);
-
-  unsigned rest = (matSize % 4);
+  std::vector<Value*> elts(ElemCount);
+  unsigned rest = (ElemCount % 4);
   if (rest) {
-    Value *ResultElts[4];
-    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
+    Value* ResultElts[4];
+    Value *bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, rest, alignment);
+    bufLds.emplace_back(bufLd);
     for (unsigned i = 0; i < rest; i++)
       elts[i] = ResultElts[i];
     offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
   }
 
-  for (unsigned i = rest; i < matSize; i += 4) {
-    Value *ResultElts[4];
-    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
+  for (unsigned i = rest; i < ElemCount; i += 4) {
+    Value* ResultElts[4];
+    Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, 4, alignment);
+    bufLds.emplace_back(bufLd);
     elts[i] = ResultElts[0];
     elts[i + 1] = ResultElts[1];
     elts[i + 2] = ResultElts[2];
@@ -6893,7 +6905,24 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
   }
 
-  Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
+  // If the expected return type is scalar then skip building a vector
+  if (isScalarTy) {
+    return elts[0];
+  }
+
+  Value* Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder);
+  return Vec;
+}
+
+Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
+                               Value *handle, hlsl::OP *OP, Value *status,
+                               Value *bufIdx, Value *baseOffset,
+                               const DataLayout &DL) {
+  HLMatrixType MatTy = HLMatrixType::cast(matType);
+  Type *EltTy = MatTy.getElementTypeForMem();
+  unsigned matSize = MatTy.getNumElements();
+  std::vector<Value*> bufLds;
+  Value* Vec = TranslateStructBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx, baseOffset, DL, bufLds);
   Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
   return Vec;
 }

+ 54 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/StructuredBuffer/struct_buf_mat_load_method.hlsl

@@ -0,0 +1,54 @@
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float %s | FileCheck %s -check-prefix=CHK_SCALAR
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float1 %s | FileCheck %s -check-prefix=CHK_VEC1
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float1x1 %s | FileCheck %s -check-prefix=CHK_MAT1x1
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=bool1x2 %s | FileCheck %s -check-prefix=CHK_MAT1x2
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=int2x1 %s | FileCheck %s -check-prefix=CHK_MAT2x1
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=uint2x2 %s | FileCheck %s -check-prefix=CHK_MAT2x2
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float16_t2x3 %s | FileCheck %s -check-prefix=CHK_MAT2x3
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=uint16_t3x2 %s | FileCheck %s -check-prefix=CHK_MAT3x2
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float3x3 %s | FileCheck %s -check-prefix=CHK_MAT3x3
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=int3x4 %s | FileCheck %s -check-prefix=CHK_MAT3x4
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=bool4x3 %s | FileCheck %s -check-prefix=CHK_MAT4x3
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=uint4x4 %s | FileCheck %s -check-prefix=CHK_MAT4x4
+
+
+// CHK_SCALAR: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+
+// CHK_VEC1: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+
+// CHK_MAT1x1: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+
+// CHK_MAT1x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 4)
+
+// CHK_MAT2x1: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 4)
+
+// CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 20, i8 15, i32 4)
+
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 15, i32 4)
+
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 15, i32 4)
+
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 15, i32 4)
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 48, i8 15, i32 4)
+
+StructuredBuffer<TY> stbuf;
+
+TY main (int i : IN0) : OUT {
+  return stbuf.Load(i);
+}

+ 54 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/StructuredBuffer/struct_buf_mat_subscript_method.hlsl

@@ -0,0 +1,54 @@
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float %s | FileCheck %s -check-prefix=CHK_SCALAR
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float1 %s | FileCheck %s -check-prefix=CHK_VEC1
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float1x1 %s | FileCheck %s -check-prefix=CHK_MAT1x1
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=bool1x2 %s | FileCheck %s -check-prefix=CHK_MAT1x2
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=int2x1 %s | FileCheck %s -check-prefix=CHK_MAT2x1
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=uint2x2 %s | FileCheck %s -check-prefix=CHK_MAT2x2
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float16_t2x3 %s | FileCheck %s -check-prefix=CHK_MAT2x3
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=uint16_t3x2 %s | FileCheck %s -check-prefix=CHK_MAT3x2
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=float3x3 %s | FileCheck %s -check-prefix=CHK_MAT3x3
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=int3x4 %s | FileCheck %s -check-prefix=CHK_MAT3x4
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=bool4x3 %s | FileCheck %s -check-prefix=CHK_MAT4x3
+// RUN: %dxc -E main -T vs_6_2 -enable-16bit-types -DTY=uint4x4 %s | FileCheck %s -check-prefix=CHK_MAT4x4
+
+
+// CHK_SCALAR: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+
+// CHK_VEC1: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+
+// CHK_MAT1x1: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+
+// CHK_MAT1x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 4)
+
+// CHK_MAT2x1: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 4)
+
+// CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
+
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 4)
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 20, i8 15, i32 4)
+
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 15, i32 4)
+
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 15, i32 4)
+
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 15, i32 4)
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 48, i8 15, i32 4)
+
+StructuredBuffer<TY> stbuf;
+
+TY main (int i : IN0) : OUT {
+  return stbuf[i];
+}