Browse Source

Structure alignment Change (#721)

Structure buffer alignment change for float16. Starting in shader model 6.2, the structure will match that of #pragma pack(8) alignment in C++

Removing padding for structure buffer. We will not have any padding in HLSL structure since load/store operation already handles offset as if padding exist in #pragma pack(8) mode in C++

Disable legacy alignment for non precision mode as it was only needed for min precision
Young Kim 7 năm trước cách đây
mục cha
commit
40369a5884

+ 4 - 3
lib/HLSL/DxilGenerationPass.cpp

@@ -277,8 +277,9 @@ public:
     // Translate precise on allocas into function call to keep the information after mem2reg.
     // The function calls will be removed after propagate precise attribute.
     TranslatePreciseAttribute();
-    // Change struct type to legacy layout for cbuf and struct buf.
-    UpdateStructTypeForLegacyLayout();
+    // Change struct type to legacy layout for cbuf and struct buf for min precision data types.
+    if (M.GetHLModule().GetHLOptions().bUseMinPrecision)
+      UpdateStructTypeForLegacyLayout();
 
     // High-level metadata should now be turned into low-level metadata.
     const bool SkipInit = true;
@@ -1139,7 +1140,7 @@ Type *UpdateFieldTypeForLegacyLayout(Type *Ty, bool IsCBuf, DxilFieldAnnotation
     if (Ty->isHalfTy()) {
       return Type::getFloatTy(Ty->getContext());
     } else if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
-      if (ITy->getBitWidth() <= 32)
+      if (ITy->getBitWidth() < 32)
         return i32Ty;
       else
         return Ty;

+ 1 - 1
lib/HLSL/DxilValidation.cpp

@@ -2937,7 +2937,7 @@ static void ValidateResource(hlsl::DxilResource &res,
   if (res.IsStructuredBuffer()) {
     unsigned stride = res.GetElementStride();
     bool alignedTo4Bytes = (stride & 3) == 0;
-    if (!alignedTo4Bytes) {
+    if (!alignedTo4Bytes && !ValCtx.M.GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision()) {
       ValCtx.EmitResourceFormatError(
           &res, ValidationRule::MetaStructBufAlignment,
           {std::to_string(4), std::to_string(stride)});

+ 1 - 1
lib/HLSL/HLModule.cpp

@@ -863,7 +863,7 @@ const char *HLModule::GetLegacyDataLayoutDesc() {
 }
 
 // New data layout with native low precision types
-static const StringRef kNewLayoutString = "e-m:e-p:32:32-i1:32:32-i8:32:32-i16:32:32-i64:64-f16:16-f80:32-n8:16:32-a:0:32-S320";
+static const StringRef kNewLayoutString = "e-m:e-p:32:32-i1:32:32-i8:8:32-i16:16:32-i64:64-f16:16-f80:32-n8:16:32-a:0:32-S320";
 const char *HLModule::GetNewDataLayoutDesc() {
   return kNewLayoutString.data();
 }

+ 7 - 3
lib/HLSL/HLOperationLower.cpp

@@ -38,13 +38,16 @@ struct HLOperationLowerHelper {
   DxilTypeSystem &dxilTypeSys;
   DxilFunctionProps *functionProps;
   bool bLegacyCBufferLoad;
+  bool bNewDataLayout;
   DataLayout legacyDataLayout;
+  DataLayout newDataLayout;
   HLOperationLowerHelper(HLModule &HLM);
 };
 
 HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
     : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
-      legacyDataLayout(HLModule::GetLegacyDataLayoutDesc()) {
+      legacyDataLayout(HLModule::GetLegacyDataLayoutDesc()),
+      newDataLayout(HLModule::GetNewDataLayoutDesc()) {
   llvm::LLVMContext &Ctx = HLM.GetCtx();
   voidTy = Type::getVoidTy(Ctx);
   f32Ty = Type::getFloatTy(Ctx);
@@ -56,6 +59,7 @@ HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
   if (HLM.HasDxilFunctionProps(EntryFunc))
     functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
   bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
+  bNewDataLayout = !HLM.GetHLOptions().bUseMinPrecision;
 }
 
 struct HLObjectOperationLowerHelper {
@@ -6301,11 +6305,11 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
       Type *RetTy = ObjTy->getStructElementType(0);
       if (RK == DxilResource::Kind::StructuredBuffer) {
         TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
-                                    helper.legacyDataLayout);
+                                    helper.bNewDataLayout ? helper.newDataLayout : helper.legacyDataLayout);
       } else if (RetTy->isAggregateType() &&
                  RK == DxilResource::Kind::TypedBuffer) {
         TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
-                                    helper.legacyDataLayout);
+                                    helper.bNewDataLayout ? helper.newDataLayout : helper.legacyDataLayout);
         // Clear offset for typed buf.
         for (auto User : handle->users()) {
           CallInst *CI = cast<CallInst>(User);

+ 4 - 1
tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp

@@ -274,12 +274,14 @@ void CGRecordLowering::lower(bool NVBaseType) {
       accumulateVBases();
   }
   std::stable_sort(Members.begin(), Members.end());
+#if 0 // HLSL Change - No padding for structure. Array offset will be handled when load/store is called
   Members.push_back(StorageInfo(Size, getIntNType(8)));
   clipTailPadding();
   determinePacked(NVBaseType);
   insertPadding();
   Members.pop_back();
   calculateZeroInit();
+#endif // HLSL Change End
   fillOutputFields();
 }
 
@@ -734,8 +736,8 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
 
 #ifndef NDEBUG
   // Verify that the computed LLVM struct size matches the AST layout size.
+#if 0 // HLSL Change - No padding for structure. Disable validation check.
   const ASTRecordLayout &Layout = getContext().getASTRecordLayout(D);
-
   uint64_t TypeSizeInBits = getContext().toBits(Layout.getSize());
   assert(TypeSizeInBits == getDataLayout().getTypeAllocSizeInBits(Ty) &&
          "Type size mismatch!");
@@ -807,6 +809,7 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
     assert(static_cast<unsigned>(Info.Offset) + Info.Size <= Info.StorageSize &&
            "Bitfield outside of its allocated storage");
   }
+#endif // HLSL Change End
 #endif
 
   return RL;

+ 2 - 2
tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl

@@ -3,7 +3,7 @@
 // CHECK: Use native low precision
 // CHECK: cbuffer Foo
 // CHECK: {
-// CHECK:   struct dx.alignment.legacy.Foo
+// CHECK:   struct Foo
 // CHECK:   {
 // CHECK:       half f_h1;                                    ; Offset:    0
 // CHECK:       float3 f_f3;                                  ; Offset:    4
@@ -23,7 +23,7 @@
 
 // CHECK: cbuffer Bar
 // CHECK: {
-// CHECK:   struct dx.alignment.legacy.Bar
+// CHECK:   struct Bar
 // CHECK:   {
 // CHECK:       half b_h1;                                    ; Offset:    0
 // CHECK:       half b_h2;                                    ; Offset:    2

+ 59 - 0
tools/clang/test/CodeGenHLSL/struct_buf_new_layout.hlsl

@@ -0,0 +1,59 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s  | FileCheck %s
+
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 0, half 0xH3C00, half 0xH3C00, half 0xH3C00, half undef, i8 7)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 8, i32 2, i32 2, i32 2, i32 2, i8 15)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 24, half 0xH4200, half 0xH4200, half 0xH4200, half undef, i8 7)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 30, half 0xH4400, half 0xH4400, half 0xH4400, half 0xH4400, i8 15)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 40, i32 %1, i32 %2, i32 undef, i32 undef, i8 3)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 48, half 0xH4600, half undef, half undef, half undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 50, half 0xH4700, half undef, half undef, half undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 52, half 0xH4800, half undef, half undef, half undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 56, i32 9, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 0, i32 %4, i32 %5, i32 undef, i32 undef, i8 3)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 8, half 0xH4000, half 0xH4000, half 0xH4000, half undef, i8 7)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 16, i32 3, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+
+struct MyStruct1
+{
+    half3  m_1;
+    int4   m_2;
+    half3  m_3;
+    half4  m_4;
+    double m_5;
+    half   m_6;
+    half   m_7;
+    half   m_8;
+    int    m_9;
+};
+
+struct MyStruct2
+{
+    double m_1;
+    half3  m_2;
+    int    m_3;
+};
+
+RWStructuredBuffer<MyStruct1> g_sb1: register(u0);
+RWStructuredBuffer<MyStruct2> g_sb2: register(u1);
+
+float4 main() : SV_Target {
+    MyStruct1 myStruct;
+    myStruct.m_1 = 1;
+    myStruct.m_2 = 2;
+    myStruct.m_3 = 3;
+    myStruct.m_4 = 4;
+    myStruct.m_5 = 5;
+    myStruct.m_6 = 6;
+    myStruct.m_7 = 7;
+    myStruct.m_8 = 8;
+    myStruct.m_9 = 9;
+    g_sb1[0] = myStruct;
+
+    MyStruct2 myStruct2;
+    myStruct2.m_1 = 1;
+    myStruct2.m_2 = 2;
+    myStruct2.m_3 = 3;
+    g_sb2[0] = myStruct2;
+
+    return 1;
+}

+ 10 - 0
tools/clang/test/CodeGenHLSL/typed_buffer_half.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s  | FileCheck %s
+
+// CHECK: call void @dx.op.bufferStore.f16
+
+RWBuffer<half4> g_tb: register(u1);
+
+float4 main() : SV_Target {
+    g_tb[3] = half4(1,2,3,4);
+    return 1;
+}

+ 12 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -774,6 +774,7 @@ public:
   TEST_METHOD(CodeGenStaticResource)
   TEST_METHOD(CodeGenStaticResource2)
   TEST_METHOD(CodeGenStruct_Buf1)
+  TEST_METHOD(CodeGenStruct_Buf_New_Layout)
   TEST_METHOD(CodeGenStruct_BufHasCounter)
   TEST_METHOD(CodeGenStruct_BufHasCounter2)
   TEST_METHOD(CodeGenStructArray)
@@ -850,6 +851,7 @@ public:
   TEST_METHOD(CodeGenTemplate_Checks_Mod)
   TEST_METHOD(CodeGenToinclude2_Mod)
   TEST_METHOD(CodeGenTypemods_Syntax_Mod)
+  TEST_METHOD(CodeGenTypedBufferHalf)
   TEST_METHOD(CodeGenVarmods_Syntax_Mod)
   TEST_METHOD(CodeGenVector_Assignments_Mod)
   TEST_METHOD(CodeGenVector_Syntax_Mix_Mod)
@@ -4199,6 +4201,11 @@ TEST_F(CompilerTest, CodeGenStruct_Buf1) {
   CodeGenTest(L"..\\CodeGenHLSL\\struct_buf1.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenStruct_Buf_New_Layout) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf_new_layout.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenStruct_BufHasCounter) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_bufHasCounter.hlsl");
 }
@@ -4502,6 +4509,11 @@ TEST_F(CompilerTest, CodeGenTypemods_Syntax_Mod) {
   CodeGenTest(L"..\\CodeGenHLSL\\typemods-syntax_Mod.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenTypedBufferHalf) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\typed_buffer_half.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenVarmods_Syntax_Mod) {
   CodeGenTest(L"..\\CodeGenHLSL\\varmods-syntax_Mod.hlsl");
 }