Browse Source

Fix handling of [RW]ByteAddressBuffer load involving matrix type (#3469)

Vishal Sharma 4 years ago
parent
commit
3126c41892

+ 35 - 36
lib/HLSL/HLOperationLower.cpp

@@ -3573,14 +3573,15 @@ static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::
   return OP->GetI8Const(mask);
 }
 
-Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
+Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
   Value *status, Type *EltTy,
   MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
   IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
 
-static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned VecElemCount,
+static Value* TranslateRawBufVecLd(Type* VecEltTy, unsigned VecElemCount,
   IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
-  Value* bufIdx, Value* baseOffset, const DataLayout& DL, std::vector<Value*>& bufLds, bool isScalarTy = false);
+  Value* bufIdx, Value* baseOffset, const DataLayout& DL,
+  std::vector<Value*>& bufLds, unsigned baseAlign, bool isScalarTy = false);
 
 void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
                    IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
@@ -3599,26 +3600,28 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
   Type *EltTy = Ty->getScalarType();
-  // If RawBuffer load of 64-bit value, don't set alignment to 8,
-  // since buffer alignment isn't known to be anything over 4.
-  unsigned alignValue = OP->GetAllocSizeForType(EltTy);
-  if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
-    alignValue = 4;
-  Constant *Alignment = OP->GetI32Const(alignValue);
   unsigned numComponents = 1;
   if (Ty->isVectorTy()) {
     numComponents = Ty->getVectorNumElements();
   }
 
-  if (DXIL::IsStructuredBuffer(RK)) {
+  if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) {
     std::vector<Value*> bufLds;
     const bool isBool = EltTy->isIntegerTy(1);
 
     // Bool are represented as i32 in memory
     Type* MemReprTy = isBool ? Builder.getInt32Ty() : EltTy;
     bool isScalarTy = !Ty->isVectorTy();
-    Value* retValNew = TranslateStructBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
-      helper.addr, OP->GetU32Const(0), DL, bufLds, isScalarTy);
+
+    Value* retValNew = nullptr;
+    if (DXIL::IsStructuredBuffer(RK)) {
+      retValNew = TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
+        helper.addr, OP->GetU32Const(0), DL, bufLds, /*baseAlign (in bytes)*/ 8, isScalarTy);
+    } else {
+      retValNew = TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
+        nullptr, helper.addr, DL, bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy);
+    }
+
     DXASSERT_NOMSG(!bufLds.empty());
     dxilutil::MigrateDebugValue(helper.retVal, bufLds.front());
 
@@ -3703,14 +3706,7 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   }
 
   // Offset 1
-  if (RK == DxilResource::Kind::RawBuffer) {
-    // elementOffset, mask, alignment
-    loadArgs.emplace_back(undefI);
-    Type *rtnTy = helper.retVal->getType();
-    loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
-    loadArgs.emplace_back(Alignment);
-  }
-  else if (RK == DxilResource::Kind::TypedBuffer) {
+  if (RK == DxilResource::Kind::TypedBuffer) {
     loadArgs.emplace_back(undefI);
   }
 
@@ -6815,7 +6811,7 @@ static Value* ExtractFromTypedBufferLoad(const ResRetValueArray& ResRet,
   return ScalarizeElements(ResultTy, Elems, Builder);
 }
 
-Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
+Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
                          Value *status, Type *EltTy,
                          MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
                          IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
@@ -6871,34 +6867,36 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
 }
 
 
-static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned ElemCount,
+static Value* TranslateRawBufVecLd(Type* VecEltTy, unsigned ElemCount,
   IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
-  Value* bufIdx, Value* baseOffset, const DataLayout& DL, std::vector<Value*> &bufLds, bool isScalarTy) {
+  Value* bufIdx, Value* baseOffset, const DataLayout& DL,
+  std::vector<Value*> &bufLds, unsigned baseAlign, bool isScalarTy) {
+
   unsigned  EltSize = DL.getTypeAllocSize(VecEltTy);
-  Constant* alignment = OP->GetI32Const(EltSize);
+  unsigned alignment = std::min(baseAlign, EltSize);
+  Constant* alignmentVal = OP->GetI32Const(alignment);
 
-  Value* offset = baseOffset;
-  if (baseOffset == nullptr)
-    offset = OP->GetU32Const(0);
+  if (baseOffset == nullptr) {
+    baseOffset = OP->GetU32Const(0);
+  }
 
   std::vector<Value*> elts(ElemCount);
   unsigned rest = (ElemCount % 4);
-  for (unsigned i = 0; i < ElemCount-rest; i += 4) {
+  for (unsigned i = 0; i < ElemCount - rest; i += 4) {
     Value* ResultElts[4];
-    Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, 4, alignment);
+    Value* bufLd = GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, ResultElts, OP, Builder, 4, alignmentVal);
     bufLds.emplace_back(bufLd);
     elts[i] = ResultElts[0];
     elts[i + 1] = ResultElts[1];
     elts[i + 2] = ResultElts[2];
     elts[i + 3] = ResultElts[3];
 
-    // Update offset by 4*4bytes.
-    offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
+    baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize));
   }
 
   if (rest) {
     Value* ResultElts[4];
-    Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, rest, alignment);
+    Value* bufLd = GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, ResultElts, OP, Builder, rest, alignmentVal);
     bufLds.emplace_back(bufLd);
     for (unsigned i = 0; i < rest; i++)
       elts[ElemCount - rest + i] = ResultElts[i];
@@ -6921,7 +6919,8 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
   Type *EltTy = MatTy.getElementTypeForMem();
   unsigned matSize = MatTy.getNumElements();
   std::vector<Value*> bufLds;
-  Value* Vec = TranslateStructBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx, baseOffset, DL, bufLds);
+  Value* Vec = TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx,
+    baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8);
   Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
   return Vec;
 }
@@ -7136,13 +7135,13 @@ void TranslateStructBufMatSubscript(CallInst *CI,
         for (unsigned i = 0; i < resultSize; i++) {
           Value *ResultElt;
           // TODO: This can be inefficient for row major matrix load
-          GenerateStructBufLd(handle, bufIdx, idxList[i],
+          GenerateRawBufLd(handle, bufIdx, idxList[i],
                               /*status*/ nullptr, EltTy, ResultElt, hlslOP,
                               ldBuilder, 1, alignment);
           ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
         }
       } else {
-        GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
+        GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
                             EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
       }
       ldUser->replaceAllUsesWith(ldData);
@@ -7300,7 +7299,7 @@ void TranslateStructBufSubscriptUser(
         }
         else {
           Value* ResultElts[4];
-          GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
+          GenerateRawBufLd(handle, bufIdx, offset, status, pOverloadTy,
                               ResultElts, OP, Builder, numComponents, alignment);
           return ScalarizeElements(Ty, ResultElts, Builder);
         }

+ 108 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_templated_load_method.hlsl

@@ -0,0 +1,108 @@
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=float -DRET_TY=float %s | FileCheck %s -check-prefix=CHK_SCALAR
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=float1 -DRET_TY=float1 %s | FileCheck %s -check-prefix=CHK_VEC1
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=float1x1 -DRET_TY=float1x1 %s | FileCheck %s -check-prefix=CHK_MAT1x1
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=bool1x2 -DRET_TY=bool1x2 %s | FileCheck %s -check-prefix=CHK_MAT1x2
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=int2x1 -DRET_TY=int2x1 %s | FileCheck %s -check-prefix=CHK_MAT2x1
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=uint2x2 -DRET_TY=uint2x2 %s | FileCheck %s -check-prefix=CHK_MAT2x2
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=float16_t2x3 -DRET_TY=float16_t2x3 %s | FileCheck %s -check-prefix=CHK_MAT2x3
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=uint16_t3x2 -DRET_TY=uint16_t3x2 %s | FileCheck %s -check-prefix=CHK_MAT3x2
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=float3x3 -DRET_TY=float3x3 %s | FileCheck %s -check-prefix=CHK_MAT3x3
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=int3x4 -DRET_TY=int3x4 %s | FileCheck %s -check-prefix=CHK_MAT3x4
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=bool4x3 -DRET_TY=bool4x3 %s | FileCheck %s -check-prefix=CHK_MAT4x3
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=uint4x4 -DRET_TY=uint4x4 %s | FileCheck %s -check-prefix=CHK_MAT4x4
+
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=double -DRET_TY=float %s | FileCheck %s -check-prefix=CHK_DBL
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=double4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=CHK_DBL4
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=double3x3 -DRET_TY=float3x3 %s | FileCheck %s -check-prefix=CHK_DBL3x3
+// RUN: %dxc -E main -T vs_6_5 -enable-16bit-types -DTY=double4x4 -DRET_TY=float4x4 %s | FileCheck %s -check-prefix=CHK_DBL4x4
+
+// RUN: %dxc -E main -T vs_6_5 -DTY=min16float -DRET_TY=min16float %s | FileCheck %s -check-prefix=CHK_MINFLT
+// RUN: %dxc -E main -T vs_6_5 -DTY=min16uint3x3 -DRET_TY=min16uint3x3 %s | FileCheck %s -check-prefix=CHK_MINUINT3x3
+// RUN: %dxc -E main -T vs_6_5 -DTY=min16float4x4 -DRET_TY=min16float4x4 %s | FileCheck %s -check-prefix=CHK_MINFLT4x4
+
+
+// CHK_SCALAR: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_VEC1: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_MAT1x1: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_MAT1x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 3, i32 4)
+
+// CHK_MAT2x1: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 3, i32 4)
+
+// CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 2)
+// CHK_MAT2x3: add i32 %{{.*}}, 8
+// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 3, i32 2)
+
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 2)
+// CHK_MAT3x2: add i32 %{{.*}}, 8
+// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 3, i32 2)
+
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT3x3: add i32 %{{.*}}, 16
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT3x3: add i32 %{{.*}}, 32
+// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT3x4: add i32 %{{.*}}, 16
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT3x4: add i32 %{{.*}}, 32
+// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT4x3: add i32 %{{.*}}, 16
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT4x3: add i32 %{{.*}}, 32
+// CHK_MAT4x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT4x4: add i32 %{{.*}}, 16
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT4x4: add i32 %{{.*}}, 32
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MAT4x4: add i32 %{{.*}}, 48
+// CHK_MAT4x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+// CHK_DBL: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_DBL4: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+// CHK_DBL3x3: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_DBL3x3: add i32 %{{.*}}, 32
+// CHK_DBL3x3: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_DBL3x3: add i32 %{{.*}}, 64
+// CHK_DBL3x3: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_DBL4x4: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_DBL4x4: add i32 %{{.*}}, 32
+// CHK_DBL4x4: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_DBL4x4: add i32 %{{.*}}, 64
+// CHK_DBL4x4: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_DBL4x4: add i32 %{{.*}}, 96
+// CHK_DBL4x4: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+// CHK_MINFLT: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_MINUINT3x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MINUINT3x3: add i32 %{{.*}}, 16
+// CHK_MINUINT3x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MINUINT3x3: add i32 %{{.*}}, 32
+// CHK_MINUINT3x3: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 undef, i8 1, i32 4)
+
+// CHK_MINFLT4x4: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MINFLT4x4: add i32 %{{.*}}, 16
+// CHK_MINFLT4x4: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MINFLT4x4: add i32 %{{.*}}, 32
+// CHK_MINFLT4x4: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+// CHK_MINFLT4x4: add i32 %{{.*}}, 48
+// CHK_MINFLT4x4: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %bab_texture_rawbuf, i32 %{{.*}}, i32 undef, i8 15, i32 4)
+
+ByteAddressBuffer bab;
+
+RET_TY main (int i : IN0) : OUT {
+  return bab.Load<TY>(i);
+}