Răsfoiți Sursa

Honor matrix pack when storing to RWStructuredBuffer fields (#1804)

DXC would shuffle the vectorified matrix when lowering dx.hl.matldst.colStore to a structured buffer. The problem is that this is already done by the lowering of the row-to-column major cast emitted by the codegen right before the store, hence the two shuffles would cancel one another out.
Tristan Labelle 6 ani în urmă
părinte
comite
f05272205a

+ 11 - 30
lib/HLSL/HLOperationLower.cpp

@@ -5985,7 +5985,7 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
 Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
                                Value *handle, hlsl::OP *OP, Value *status,
                                Value *bufIdx, Value *baseOffset,
-                               bool colMajor, const DataLayout &DL) {
+                               const DataLayout &DL) {
   unsigned col, row;
   HLMatrixLower::GetMatrixInfo(matType, col, row);
   Type *EltTy = HLMatrixLower::LowerMatrixType(matType, /*forMem*/true)->getVectorElementType();
@@ -6027,7 +6027,7 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
 
 void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
                              hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
-                             Value *val, bool colMajor, const DataLayout &DL) {
+                             Value *val, const DataLayout &DL) {
   unsigned col, row;
   HLMatrixLower::GetMatrixInfo(matType, col, row);
   Type *EltTy = HLMatrixLower::LowerMatrixType(matType, /*forMem*/true)->getVectorElementType();
@@ -6048,18 +6048,8 @@ void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
     storeSize = matSize + 4 - (matSize & 3);
   }
   std::vector<Value *> elts(storeSize, undefElt);
-
-  if (colMajor) {
-    for (unsigned i = 0; i < matSize; i++)
-      elts[i] = Builder.CreateExtractElement(val, i);
-  } else {
-    for (unsigned r = 0; r < row; r++)
-      for (unsigned c = 0; c < col; c++) {
-        unsigned rowMajorIdx = r * col + c;
-        unsigned colMajorIdx = c * row + r;
-        elts[rowMajorIdx] = Builder.CreateExtractElement(val, colMajorIdx);
-      }
-  }
+  for (unsigned i = 0; i < matSize; i++)
+    elts[i] = Builder.CreateExtractElement(val, i);
 
   for (unsigned i = 0; i < matSize; i += 4) {
     uint8_t mask = 0;
@@ -6084,34 +6074,25 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
   DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
                     "only translate matrix loadStore here.");
   HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
+  // Due to the current way the initial codegen generates matrix
+  // orientation casts, the in-register vector matrix has already been
+  // reordered based on the destination's row or column-major packing orientation.
   switch (matOp) {
+  case HLMatLoadStoreOpcode::RowMatLoad:
   case HLMatLoadStoreOpcode::ColMatLoad: {
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
     Value *NewLd = TranslateStructBufMatLd(
         ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
-        bufIdx, baseOffset, /*colMajor*/ true, DL);
-    CI->replaceAllUsesWith(NewLd);
-  } break;
-  case HLMatLoadStoreOpcode::RowMatLoad: {
-    Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
-    Value *NewLd = TranslateStructBufMatLd(
-        ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
-        bufIdx, baseOffset, /*colMajor*/ false, DL);
+        bufIdx, baseOffset, DL);
     CI->replaceAllUsesWith(NewLd);
   } break;
+  case HLMatLoadStoreOpcode::RowMatStore:
   case HLMatLoadStoreOpcode::ColMatStore: {
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
     Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
     TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
                             handle, OP, bufIdx, baseOffset, val,
-                            /*colMajor*/ true, DL);
-  } break;
-  case HLMatLoadStoreOpcode::RowMatStore: {
-    Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
-    Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
-    TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
-                            handle, OP, bufIdx, baseOffset, val,
-                            /*colMajor*/ false, DL);
+                            DL);
   } break;
   }
 

+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/matrix_orientation_overrides.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc /T vs_6_0 /E main /Zpr %s | FileCheck %s | XFail GitHub #1788
+// RUN: %dxc /T vs_6_0 /E main /Zpr %s | FileCheck %s
 
 // Test effective matrix orientations with every combination
 // of default and explicit matrix orientations.

+ 21 - 0
tools/clang/test/CodeGenHLSL/quick-test/matrix_orientation_structbuf_load_struct.hlsl

@@ -0,0 +1,21 @@
+// RUN: %dxc /T vs_6_0 /E main %s | FileCheck %s
+
+// Test reading matrices from structured buffers
+// respects the declared pack orientation.
+
+struct S
+{
+    row_major int4x4 rm;
+    column_major int4x4 cm; // Offset: 64 bytes
+};
+StructuredBuffer<S> b;
+
+int main() : OUT
+{
+    S s = b[0];
+    // CHECK: %[[row:.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle {{.*}}, i32 0, i32 16)
+    // CHECK: extractvalue %dx.types.ResRet.i32 %[[row]], 2
+    // CHECK: %[[col:.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle {{.*}}, i32 0, i32 96)
+    // CHECK: extractvalue %dx.types.ResRet.i32 %[[col]], 1
+    return s.rm._23 + s.cm._23;
+}

+ 0 - 0
tools/clang/test/CodeGenHLSL/quick-test/matrix_orientation_bufferstore.hlsl → tools/clang/test/CodeGenHLSL/quick-test/matrix_orientation_structbuf_store.hlsl


+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/matrix_orientation_bufferstore_struct.hlsl → tools/clang/test/CodeGenHLSL/quick-test/matrix_orientation_structbuf_store_struct.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc /T vs_6_0 /E main %s | FileCheck %s | XFail GitHub #1788
+// RUN: %dxc /T vs_6_0 /E main %s | FileCheck %s
 
 // Test writing matrices to structured buffers
 // with every combination of source/dest orientations.