Selaa lähdekoodia

DataLayout String and ConstantBuffer Offset Fix (#727)

This change is to update correct target data layout for DXIL. Now that we have a scalar type of size less than dwords, we need to correctly print string data layout to determine non-packed structure layout.

This change also fixes alignments issues with ConstantBuffer as it was having different CG path from cbuffer.
Young Kim 7 vuotta sitten
vanhempi
commit
ce9d6267a5

+ 11 - 0
include/dxc/HLSL/DxilConstants.h

@@ -928,6 +928,17 @@ namespace DXIL {
     UseNativeLowPrecision
   };
 
+
+  // TODO: revisit data layout descriptions for the following:
+  //      - x64 pointers?
+  //      - Keep elf manging(m:e)?
+
+  // For legacy data layout, everything less than 32 align to 32.
+  static const char* kLegacyLayoutString = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f:64:64-n8:16:32:64";
+
+  // New data layout with native low precision types
+  static const char* kNewLayoutString = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64";
+
 } // namespace DXIL
 
 } // namespace hlsl

+ 0 - 2
include/dxc/HLSL/HLModule.h

@@ -163,8 +163,6 @@ public:
   static bool IsHLSLObjectType(llvm::Type *Ty);
   static void GetParameterRowsAndCols(llvm::Type *Ty, unsigned &rows, unsigned &cols,
                                       DxilParameterAnnotation &paramAnnotation);
-  static const char *GetLegacyDataLayoutDesc();
-  static const char *GetNewDataLayoutDesc();
 
   static void MergeGepUse(llvm::Value *V);
 

+ 2 - 2
lib/HLSL/DxilValidation.cpp

@@ -2536,8 +2536,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           continue;
         }
 
-        bool IsMinPrecisionTy = ValCtx.DL.getTypeAllocSize(FromTy) < 4 ||
-                          ValCtx.DL.getTypeAllocSize(ToTy) < 4;
+        bool IsMinPrecisionTy = ValCtx.DL.getTypeStoreSize(FromTy) < 4 ||
+                          ValCtx.DL.getTypeStoreSize(ToTy) < 4;
         if (IsMinPrecisionTy) {
           ValCtx.EmitInstrError(Cast, ValidationRule::InstrMinPrecisonBitCast);
         }

+ 0 - 12
lib/HLSL/HLModule.cpp

@@ -856,18 +856,6 @@ void HLModule::GetParameterRowsAndCols(Type *Ty, unsigned &rows, unsigned &cols,
   rows *= arraySize;
 }
 
-// For legacy data layout, everything less than 32 align to 32.
-static const StringRef kLegacyLayoutString = "e-m:e-p:32:32-i1:32:32-i8:32:32-i16:32:32-i64:64-f16:32-f80:32-n8:16:32-a:0:32-S32";
-const char *HLModule::GetLegacyDataLayoutDesc() {
-  return kLegacyLayoutString.data();
-}
-
-// New data layout with native low precision types
-static const StringRef kNewLayoutString = "e-m:e-p:32:32-i1:32:32-i8:8:32-i16:16:32-i64:64-f16:16-f80:32-n8:16:32-a:0:32-S320";
-const char *HLModule::GetNewDataLayoutDesc() {
-  return kNewLayoutString.data();
-}
-
 static Value *MergeGEP(GEPOperator *SrcGEP, GetElementPtrInst *GEP) {
   IRBuilder<> Builder(GEP);
   SmallVector<Value *, 8> Indices;

+ 2 - 2
lib/HLSL/HLOperationLower.cpp

@@ -46,8 +46,8 @@ struct HLOperationLowerHelper {
 
 HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
     : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
-      legacyDataLayout(HLModule::GetLegacyDataLayoutDesc()),
-      newDataLayout(HLModule::GetNewDataLayoutDesc()) {
+      legacyDataLayout(hlsl::DXIL::kLegacyLayoutString),
+      newDataLayout(hlsl::DXIL::kNewLayoutString) {
   llvm::LLVMContext &Ctx = HLM.GetCtx();
   voidTy = Type::getVoidTy(Ctx);
   f32Ty = Type::getFloatTy(Ctx);

+ 3 - 0
tools/clang/include/clang/Basic/TargetOptions.h

@@ -47,6 +47,9 @@ public:
   std::vector<std::string> Features;
   
   std::vector<std::string> Reciprocals;
+
+  // HLSL Change: Target layout can change by min precision option
+  const char *DescriptionString;
 };
 
 }  // end namespace clang

+ 5 - 5
tools/clang/lib/Basic/Targets.cpp

@@ -7033,9 +7033,9 @@ const Builtin::Info DXILTargetInfo::BuiltinInfo[] = {
 class DXIL_32TargetInfo : public DXILTargetInfo {
 
 public:
-  DXIL_32TargetInfo(const llvm::Triple &Triple) : DXILTargetInfo(Triple) {
+  DXIL_32TargetInfo(const llvm::Triple &Triple, const char *descriptionString) : DXILTargetInfo(Triple) {
     // TODO: Update Description for DXIL
-    DescriptionString = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
+    DescriptionString = descriptionString;
   }
 };
 }
@@ -7046,9 +7046,9 @@ public:
 // Driver code
 //===----------------------------------------------------------------------===//
 
-static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
+static TargetInfo *AllocateTarget(const llvm::Triple &Triple, const char* descrptionString) {
 #if 1 // HLSL Change
-  return new DXIL_32TargetInfo(Triple);
+  return new DXIL_32TargetInfo(Triple, descrptionString);
 #else // HLSL Change
   llvm::Triple::OSType os = Triple.getOS();
 
@@ -7459,7 +7459,7 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
   llvm::Triple Triple(Opts->Triple);
 
   // Construct the target
-  std::unique_ptr<TargetInfo> Target(AllocateTarget(Triple));
+  std::unique_ptr<TargetInfo> Target(AllocateTarget(Triple, Opts.get()->DescriptionString));
   if (!Target) {
     Diags.Report(diag::err_target_unknown_triple) << Triple.str();
     return nullptr;

+ 67 - 78
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -93,7 +93,7 @@ private:
   llvm::Type *CBufferType;
   uint32_t globalCBIndex;
   // TODO: make sure how minprec works
-  llvm::DataLayout legacyLayout;
+  llvm::DataLayout dataLayout;
   // decl map to constant id for program
   llvm::DenseMap<HLSLBufferDecl *, uint32_t> constantBufMap;
   // Map for resource type to resource metadata value.
@@ -313,7 +313,10 @@ void clang::CompileRootSignature(
 //
 CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
     : CGHLSLRuntime(CGM), Context(CGM.getLLVMContext()), EntryFunc(nullptr),
-      TheModule(CGM.getModule()), legacyLayout(CGM.getLangOpts().UseMinPrecision ? HLModule::GetLegacyDataLayoutDesc() : HLModule::GetNewDataLayoutDesc()),
+      TheModule(CGM.getModule()),
+      dataLayout(CGM.getLangOpts().UseMinPrecision
+                       ? hlsl::DXIL::kLegacyLayoutString
+                       : hlsl::DXIL::kNewLayoutString),
       CBufferType(
           llvm::StructType::create(TheModule.getContext(), "ConstantBuffer")) {
   const hlsl::ShaderModel *SM =
@@ -489,64 +492,69 @@ StringToTessOutputPrimitive(StringRef primitive) {
   return DXIL::TessellatorOutputPrimitive::Undefined;
 }
 
-static unsigned AlignTo8Bytes(unsigned offset, bool b8BytesAlign) {
-  DXASSERT((offset & 0x1) == 0, "offset should be divisible by 2");
-  if (!b8BytesAlign)
-    return offset;
-  else if ((offset & 0x7) == 0)
-    return offset;
-  else
-    return offset + 4;
+static unsigned RoundToAlign(unsigned num, unsigned mod) {
+  // round num to next highest mod
+  if (mod != 0)
+    return mod * ((num + mod - 1) / mod);
+  return num;
+}
+
+// Align cbuffer offset in legacy mode (16 bytes per row).
+static unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
+                                          unsigned scalarSizeInBytes,
+                                          bool bNeedNewRow) {
+  if (unsigned remainder = (offset & 0xf)) {
+    // Start from new row
+    if (remainder + size > 16 || bNeedNewRow) {
+      return offset + 16 - remainder;
+    }
+    // If not, naturally align data
+    return RoundToAlign(offset, scalarSizeInBytes);
+  }
+  return offset;
 }
 
 static unsigned AlignBaseOffset(unsigned baseOffset, unsigned size,
                                  QualType Ty, bool bDefaultRowMajor) {
-  bool b8BytesAlign = false;
-  if (Ty->isBuiltinType()) {
-    const clang::BuiltinType *BT = Ty->getAs<clang::BuiltinType>();
-    if (BT->getKind() == clang::BuiltinType::Kind::Double ||
-        BT->getKind() == clang::BuiltinType::Kind::LongLong)
-      b8BytesAlign = true;
-  }
-
-  if (unsigned remainder = (baseOffset & 0xf)) {
-    // Align to 4 x 4 bytes.
-    unsigned aligned = baseOffset - remainder + 16;
-    // If cannot fit in the remainder, need align.
-    bool bNeedAlign = (remainder + size) > 16;
-    // Array always start aligned.
-    bNeedAlign |= Ty->isArrayType();
-
-    if (IsHLSLMatType(Ty)) {
-      bool bColMajor = !bDefaultRowMajor;
-      if (const AttributedType *AT = dyn_cast<AttributedType>(Ty)) {
-        switch (AT->getAttrKind()) {
-        case AttributedType::Kind::attr_hlsl_column_major:
-          bColMajor = true;
-          break;
-        case AttributedType::Kind::attr_hlsl_row_major:
-          bColMajor = false;
-          break;
-        default:
-          // Do nothing
-          break;
-        }
+  bool needNewAlign = Ty->isArrayType();
+
+  if (IsHLSLMatType(Ty)) {
+    bool bColMajor = !bDefaultRowMajor;
+    if (const AttributedType *AT = dyn_cast<AttributedType>(Ty)) {
+      switch (AT->getAttrKind()) {
+      case AttributedType::Kind::attr_hlsl_column_major:
+        bColMajor = true;
+        break;
+      case AttributedType::Kind::attr_hlsl_row_major:
+        bColMajor = false;
+        break;
+      default:
+        // Do nothing
+        break;
       }
+    }
 
-      unsigned row, col;
-      hlsl::GetHLSLMatRowColCount(Ty, row, col);
+    unsigned row, col;
+    hlsl::GetHLSLMatRowColCount(Ty, row, col);
 
-      bNeedAlign |= bColMajor && col > 1;
-      bNeedAlign |= !bColMajor && row > 1;
-    }
+    needNewAlign |= bColMajor && col > 1;
+    needNewAlign |= !bColMajor && row > 1;
+  }
 
-    if (bNeedAlign)
-      return AlignTo8Bytes(aligned, b8BytesAlign);
-    else
-      return AlignTo8Bytes(baseOffset, b8BytesAlign);
+  unsigned scalarSizeInBytes = 4;
+  const clang::BuiltinType *BT = Ty->getAs<clang::BuiltinType>();
+  if (hlsl::IsHLSLVecMatType(Ty)) {
+    BT = CGHLSLRuntime::GetHLSLVecMatElementType(Ty)->getAs<clang::BuiltinType>();
+  }
+  if (BT) {
+    if (BT->getKind() == clang::BuiltinType::Kind::Double ||
+      BT->getKind() == clang::BuiltinType::Kind::LongLong)
+      scalarSizeInBytes = 8;
+    else if (BT->getKind() == clang::BuiltinType::Kind::Half)
+      scalarSizeInBytes = 2;
+  }
 
-  } else
-    return baseOffset;
+  return AlignBufferOffsetInLegacy(baseOffset, size, scalarSizeInBytes, needNewAlign);
 }
 
 static unsigned AlignBaseOffset(QualType Ty, unsigned baseOffset,
@@ -828,7 +836,7 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
 
         // Align offset.
         offset = AlignBaseOffset(parentTy, offset, bDefaultRowMajor, CGM,
-                                 legacyLayout);
+                                 dataLayout);
 
         unsigned CBufferOffset = offset;
 
@@ -857,7 +865,7 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
     QualType fieldTy = fieldDecl->getType();
     
     // Align offset.
-    offset = AlignBaseOffset(fieldTy, offset, bDefaultRowMajor, CGM, legacyLayout);
+    offset = AlignBaseOffset(fieldTy, offset, bDefaultRowMajor, CGM, dataLayout);
 
     unsigned CBufferOffset = offset;
 
@@ -934,12 +942,12 @@ unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
 
   // Get size.
   llvm::Type *Type = CGM.getTypes().ConvertType(paramTy);
-  unsigned size = legacyLayout.getTypeAllocSize(Type);
+  unsigned size = dataLayout.getTypeAllocSize(Type);
 
   if (IsHLSLMatType(Ty)) {
     unsigned col, row;
     llvm::Type *EltTy = HLMatrixLower::GetMatrixInfo(Type, col, row);
-    bool b64Bit = legacyLayout.getTypeAllocSize(EltTy) == 8;
+    bool b64Bit = dataLayout.getTypeAllocSize(EltTy) == 8;
     size = GetMatrixSizeInCB(Ty, m_pHLModule->GetHLOptions().bDefaultRowMajor,
                              b64Bit);
   }
@@ -2281,7 +2289,7 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc,
         templateDecl->getTemplateArgs()[0];
     llvm::Type *retTy = CGM.getTypes().ConvertType(retTyArg.getAsType());
 
-    uint32_t strideInBytes = legacyLayout.getTypeAllocSize(retTy);
+    uint32_t strideInBytes = dataLayout.getTypeAllocSize(retTy);
     hlslRes->SetElementStride(strideInBytes);
   }
 
@@ -2612,29 +2620,10 @@ void CGMSHLSLRuntime::SetEntryFunction() {
 // Here the size is CB size. So don't need check type.
 static unsigned AlignCBufferOffset(unsigned offset, unsigned size, llvm::Type *Ty) {
   DXASSERT(!(offset & 1), "otherwise we have an invalid offset.");
-  // offset is already 4 bytes aligned.
-  bool b8BytesAlign = Ty->isDoubleTy();
-  if (llvm::IntegerType *IT = dyn_cast<llvm::IntegerType>(Ty)) {
-    b8BytesAlign = IT->getBitWidth() > 32;
-  }
-  // If offset is divisible by 2 and not 4, then increase the offset by 2 for dword alignment.
-  if (!Ty->getScalarType()->isHalfTy() && (offset & 0x2)) {
-    offset += 2;
-  }
+  bool bNeedNewRow = Ty->isArrayTy();
+  unsigned scalarSizeInBytes = Ty->getScalarSizeInBits() / 8;
 
-  // Align it to 4 x 4bytes.
-  if (unsigned remainder = (offset & 0xf)) {
-    unsigned aligned = offset - remainder + 16;
-    // If cannot fit in the remainder, need align.
-    bool bNeedAlign = (remainder + size) > 16;
-    // Array always start aligned.
-    bNeedAlign |= Ty->isArrayTy();
-    if (bNeedAlign)
-      return AlignTo8Bytes(aligned, b8BytesAlign);
-    else
-      return AlignTo8Bytes(offset, b8BytesAlign);
-  } else
-    return offset;
+  return AlignBufferOffsetInLegacy(offset, size, scalarSizeInBytes, bNeedNewRow);
 }
 
 static unsigned AllocateDxilConstantBuffer(HLCBuffer &CB) {

+ 128 - 0
tools/clang/test/CodeGenHLSL/cbufferHalf-struct.hlsl

@@ -0,0 +1,128 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s | FileCheck %s
+
+// CHECK: Use native low precision
+// CHECK:   struct struct.Foo
+// CHECK:   {
+// CHECK:       half h1;                                    ; Offset:    0
+// CHECK:       float3 f3;                                  ; Offset:    4
+
+// CHECK:       half2 h2;                                   ; Offset:   16
+// CHECK:       float3 f3_1;                                ; Offset:   20
+
+// CHECK:       float2 f2;                                  ; Offset:   32
+// CHECK:       half4 h4;                                   ; Offset:   40
+
+// CHECK:       half2 h2_1;                                 ; Offset:   48
+// CHECK:       half3 h3;                                   ; Offset:   52
+
+// CHECK:       double d1;                                  ; Offset:   64
+// CHECK:       half3 h3_1;                                 ; Offset:   72
+
+// CHECK:       int i1;                                     ; Offset:   80
+// CHECK:       double d2;                                  ; Offset:   88
+
+// CHECK:   } f                                             ; Offset:    0 Size:    96
+
+// CHECK:   struct struct.Bar
+// CHECK:   {
+// CHECK:       half h1;                                    ; Offset:    0
+// CHECK:       half h2;                                    ; Offset:    2
+// CHECK:       half h3;                                    ; Offset:    4
+// CHECK:       half2 h4;                                   ; Offset:    6
+// CHECK:       half3 h5;                                   ; Offset:   10
+
+// CHECK:       half3 h7;                                   ; Offset:   16
+// CHECK:       half4 h8;                                   ; Offset:   22
+// CHECK:       half h9;                                    ; Offset:   30
+
+// CHECK:       half4 h10;                                  ; Offset:   32
+// CHECK:       half3 h11;                                  ; Offset:   40
+
+// CHECK:       half2 h12;                                  ; Offset:   48
+// CHECK:       half3 h13;                                  ; Offset:   52
+// CHECK:       half2 h14;                                  ; Offset:   58
+// CHECK:   } b                                             ; Offset:    0 Size:    62
+
+// CHECK: %dx.types.CBufRet.f16.8 = type { half, half, half, half, half, half, half, half }
+
+// CHECK: %f_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
+
+struct Foo {
+  half h1;
+  float3 f3;
+
+  half2 h2;
+  float3 f3_1;
+
+  float2 f2;
+  half4 h4;
+
+  half2 h2_1;
+  half3 h3;
+
+  double d1;
+  half3 h3_1;
+  
+  int   i1;
+  double d2;
+};
+
+struct Bar {
+  half h1;
+  half h2;
+  half h3;
+  half2 h4;
+  half3 h5;
+  
+  half3 h7;
+  half4 h8;
+  half h9;
+
+  half4 h10;
+  half3 h11;
+  
+  half2 h12;
+  half3 h13;
+  half2 h14;
+};
+
+ConstantBuffer<Foo> f : register(b0);
+ConstantBuffer<Bar> b : register(b1);
+
+float4 main() : SV_Target {
+  return f.h1 + f.f3.x + f.h2.x + f.h2.y + f.f3_1.z + f.f2.x + f.h4.x + f.h4.y 
+  + f.h4.z + f.h4.w + f.h2_1.x + f.h2_1.y + f.h3.x + f.h3.y + f.h3.z + f.d1 + f.h3_1.x + f.i1 + f.d2
+  + b.h1;
+}

+ 17 - 2
tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl

@@ -18,7 +18,11 @@
 // CHECK:       half3 f_h3;                                   ; Offset:   52
 
 // CHECK:       double f_d1;                                  ; Offset:   64
-// CHECK:   } Foo                                           ; Offset:    0 Size:    72
+// CHECK:       half3 f_h3_1;                                 ; Offset:   72
+
+// CHECK:       int f_i1;                                     ; Offset:   80
+// CHECK:       double f_d2;                                  ; Offset:   88
+// CHECK:   } Foo                                             ; Offset:    0 Size:    96
 // CHECK: }
 
 // CHECK: cbuffer Bar
@@ -71,6 +75,14 @@
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
 // CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
 
 cbuffer Foo {
   half f_h1;
@@ -82,6 +94,9 @@ cbuffer Foo {
   half2 f_h2_1;
   half3 f_h3;
   double f_d1;
+  half3 f_h3_1;
+  int   f_i1;
+  double f_d2;
 }
 
 cbuffer Bar {
@@ -105,6 +120,6 @@ cbuffer Bar {
 
 float4 main() : SV_Target {
   return f_h1 + f_f3.x + f_h2.x + f_h2.y + f_f3_1.z + f_f2.x + f_h4.x + f_h4.y 
-  + f_h4.z + f_h4.w + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z + f_d1
+  + f_h4.z + f_h4.w + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z + f_d1 + f_h3_1.x + f_i1 + f_d2
   + b_h1;
 }

+ 7 - 0
tools/clang/test/CodeGenHLSL/dataLayout.hlsl

@@ -0,0 +1,7 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f:64:64-n8:16:32:64"
+
+float4 main(float4 a : A) : SV_Target {
+  return 1;
+}

+ 7 - 0
tools/clang/test/CodeGenHLSL/dataLayoutHalf.hlsl

@@ -0,0 +1,7 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s | FileCheck %s
+
+// CHECK: target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+
+float4 main(float4 a : A) : SV_Target {
+  return 1;
+}

+ 3 - 0
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -735,6 +735,9 @@ public:
     // Setup a compiler instance.
     std::shared_ptr<TargetOptions> targetOptions(new TargetOptions);
     targetOptions->Triple = "dxil-ms-dx";
+    targetOptions->DescriptionString = Opts.NoMinPrecision
+      ? hlsl::DXIL::kNewLayoutString
+      : hlsl::DXIL::kLegacyLayoutString;
     compiler.HlslLangExtensions = helper;
     compiler.createDiagnostics(diagPrinter, false);
     compiler.createFileManager();

+ 17 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -506,6 +506,7 @@ public:
   TEST_METHOD(CodeGenCbufferAlloc)
   TEST_METHOD(CodeGenCbufferAllocLegacy)
   TEST_METHOD(CodeGenCbufferHalf)
+  TEST_METHOD(CodeGenCbufferHalfStruct)
   TEST_METHOD(CodeGenCbufferInLoop)
   TEST_METHOD(CodeGenCbufferMinPrec)
   TEST_METHOD(CodeGenClass)
@@ -517,6 +518,8 @@ public:
   TEST_METHOD(CodeGenConstMat3)
   TEST_METHOD(CodeGenConstMat4)
   TEST_METHOD(CodeGenCorrectDelay)
+  TEST_METHOD(CodeGenDataLayout)
+  TEST_METHOD(CodeGenDataLayoutHalf)
   TEST_METHOD(CodeGenDiscard)
   TEST_METHOD(CodeGenDivZero)
   TEST_METHOD(CodeGenDot1)
@@ -3117,6 +3120,11 @@ TEST_F(CompilerTest, CodeGenCbufferHalf) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferHalf.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenCbufferHalfStruct) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\cbufferHalf-struct.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenCbufferInLoop) {
   CodeGenTest(L"..\\CodeGenHLSL\\cbufferInLoop.hlsl");
 }
@@ -3161,6 +3169,15 @@ TEST_F(CompilerTest, CodeGenCorrectDelay) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\correct_delay.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenDataLayout) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\dataLayout.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenDataLayoutHalf) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\dataLayoutHalf.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenDiscard) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\discard.hlsl");
 }

+ 2 - 1
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -1508,11 +1508,12 @@ TEST_F(ValidationTest, PtrBitCast) {
 }
 
 TEST_F(ValidationTest, MinPrecisionBitCast) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\staticGlobals.hlsl", "ps_6_0",
                           "%([0-9]+) = getelementptr \\[4 x i32\\], \\[4 x i32\\]\\* %([0-9]+), i32 0, i32 0\n"
                           "  store i32 %([0-9]+), i32\\* %\\1, align 4",
                           "%\\1 = getelementptr [4 x i32], [4 x i32]* %\\2, i32 0, i32 0\n"
-                          "  %X = bitcast i32* %\\1 to [2 x half]*    \n"
+                          "  %X = bitcast i32* %\\1 to half* \n"
                           "  store i32 %\\3, i32* %\\1, align 4",
                           "Bitcast on minprecison types is not allowed",
                           /*bRegex*/true);