Browse Source

PIX: Annotate structs for shader debugging of MS->AS payloads (#2826)

Three classes of fixes:

First a couple of trivial fixes in the DxcPix storage class, for >1-d arrays and embedded types.

Second, the addition of a new fragment iterator (now renamed "member iterator") that knows how to traverse the full structure of a struct, enabling the DxcPix* code to know bit offset and size for contained types.

Third, fixes to the numbering pass to know how to find alloca offsets via the various GetElementPtr statements that result when addressing struct members.

Lastly, a whole bunch of unit tests. Cuz this was really hard.

Remaining to do, as noted in the tests: the presence of a pointer-to-pointer type like floatNxM results in dbg.declare instructions not being emitted for structs. These tests (and perhaps some of the code) will need to be revisited if/when this is fixed.
Jeff Noyle 5 years ago
parent
commit
625c98fba3

+ 1 - 1
lib/DxilDia/DxcPixDxilStorage.cpp

@@ -122,7 +122,7 @@ STDMETHODIMP dxil_debug_info::DxcPixDxilArrayStorage::Index(
   }
 
   DWORD IndexedTypeSizeInBits;
-  IFR(m_pType->GetSizeInBits(&IndexedTypeSizeInBits));
+  IFR(IndexedType->GetSizeInBits(&IndexedTypeSizeInBits));
 
   const unsigned NewOffsetInBits =
       m_OffsetFromStorageStartInBits + Index * IndexedTypeSizeInBits;

+ 11 - 5
lib/DxilDia/DxcPixLiveVariables.cpp

@@ -130,7 +130,7 @@ void dxil_debug_info::LiveVariables::Impl::Init_DbgDeclare(
     return;
   }
 
-  std::unique_ptr<FragmentIterator> Iter = FragmentIterator::Create(
+  auto Iter = CreateMemberIterator(
       DbgDeclare,
       m_pModule->getDataLayout(),
       AddressAsAlloca,
@@ -138,17 +138,18 @@ void dxil_debug_info::LiveVariables::Impl::Init_DbgDeclare(
 
   if (!Iter)
   {
-    // FragmentIterator creation failure, this skip this var.
+    // MemberIterator creation failure, this skip this var.
     return;
   }
 
-  const unsigned FragmentSizeInBits = Iter->FragmentSizeInBits();
 
   unsigned FragmentIndex;
   while (Iter->Next(&FragmentIndex))
   {
+    const unsigned FragmentSizeInBits = 
+      Iter->SizeInBits(FragmentIndex);
     const unsigned FragmentOffsetInBits =
-        Iter->CurrOffsetInBits();
+        Iter->OffsetInBits(FragmentIndex);
 
     VariableInfo* VarInfo = AssignValueToOffset(
         &m_LiveVarsDbgDeclare[S],
@@ -157,10 +158,15 @@ void dxil_debug_info::LiveVariables::Impl::Init_DbgDeclare(
         FragmentIndex,
         FragmentOffsetInBits);
 
-    ValidateDbgDeclare(
+    // SROA can split structs so that multiple allocas back the same variable.
+    // In this case the expression will be empty
+    if (Expression->getNumElements() != 0)
+    {
+      ValidateDbgDeclare(
         VarInfo,
         FragmentSizeInBits,
         FragmentOffsetInBits);
+    }
   }
 }
 

+ 238 - 147
lib/DxilDia/DxcPixLiveVariables_FragmentIterator.cpp

@@ -15,63 +15,40 @@
 
 #include "dxc/DXIL/DxilMetadataHelper.h"
 #include "dxc/Support/exception.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 
 #include <vector>
 
-namespace dxil_debug_info
-{
-class DILayoutFragmentIterator : public FragmentIterator
-{
-public:
-  DILayoutFragmentIterator(
-      const llvm::DataLayout &DataLayout,
-      llvm::AllocaInst *Alloca,
-      llvm::DIExpression *Expression);
-
-  unsigned CurrOffsetInBits() override;
-};
-
-class DebugLayoutFragmentIterator : public FragmentIterator
+///////////////////////////////////////////////////////////////////////////////
+class FragmentIteratorBase : public dxil_debug_info::MemberIterator
 {
 public:
-  DebugLayoutFragmentIterator(
-      const llvm::DataLayout& DataLayout,
-      llvm::AllocaInst* Alloca,
-      unsigned InitialOffsetInBits,
-      const std::vector<hlsl::DxilDIArrayDim> &ArrayDims);
-
-  unsigned CurrOffsetInBits() override;
-
-private:
-  std::vector<hlsl::DxilDIArrayDim> m_ArrayDims;
+  virtual ~FragmentIteratorBase() {}
+  virtual unsigned SizeInBits(unsigned ) const override;
+  virtual bool Next(unsigned *FragmentIndex) override;
+
+protected:
+  FragmentIteratorBase(unsigned NumFragments, unsigned FragmentSizeInBits,
+    unsigned InitialOffsetInBits);
+
+  unsigned m_CurrFragment = 0;
+  unsigned m_NumFragments = 0;
+  unsigned m_FragmentSizeInBits = 0;
+  unsigned m_InitialOffsetInBits = 0;
 };
-}  // namespace dxil_debug_info
-
-dxil_debug_info::FragmentIterator::FragmentIterator(
-    unsigned NumFragments,
-    unsigned FragmentSizeInBits,
-    unsigned InitialOffsetInBits
-) : m_NumFragments(NumFragments)
-  , m_FragmentSizeInBits(FragmentSizeInBits)
-  , m_InitialOffsetInBits(InitialOffsetInBits)
-{
-}
 
-unsigned dxil_debug_info::FragmentIterator::FragmentSizeInBits() const
+unsigned FragmentIteratorBase::SizeInBits(unsigned ) const
 {
   return m_FragmentSizeInBits;
 }
 
-bool dxil_debug_info::FragmentIterator::Next(
-    unsigned* FragmentIndex
-)
-{
-  if (m_CurrFragment >= m_NumFragments)
-  {
+bool FragmentIteratorBase::Next(unsigned *FragmentIndex) {
+  if (m_CurrFragment >= m_NumFragments) {
     return false;
   }
 
@@ -79,11 +56,17 @@ bool dxil_debug_info::FragmentIterator::Next(
   return true;
 }
 
-static unsigned NumAllocaElements(llvm::AllocaInst *Alloca)
-{
-  llvm::Type* FragmentTy = Alloca->getAllocatedType();
-  if (auto* ArrayTy = llvm::dyn_cast<llvm::ArrayType>(FragmentTy))
-  {
+FragmentIteratorBase::FragmentIteratorBase(
+    unsigned NumFragments, unsigned FragmentSizeInBits,
+    unsigned InitialOffsetInBits)
+    : m_NumFragments(NumFragments), m_FragmentSizeInBits(FragmentSizeInBits),
+      m_InitialOffsetInBits(InitialOffsetInBits) {}
+
+
+///////////////////////////////////////////////////////////////////////////////
+static unsigned NumAllocaElements(llvm::AllocaInst *Alloca) {
+  llvm::Type *FragmentTy = Alloca->getAllocatedType();
+  if (auto *ArrayTy = llvm::dyn_cast<llvm::ArrayType>(FragmentTy)) {
     return ArrayTy->getNumElements();
   }
 
@@ -91,14 +74,10 @@ static unsigned NumAllocaElements(llvm::AllocaInst *Alloca)
   return NumElements;
 }
 
-static unsigned FragmentSizeInBitsFromAlloca(
-    const llvm::DataLayout &DataLayout,
-    llvm::AllocaInst *Alloca
-)
-{
+static unsigned FragmentSizeInBitsFromAlloca(const llvm::DataLayout &DataLayout,
+                                             llvm::AllocaInst *Alloca) {
   llvm::Type *FragmentTy = Alloca->getAllocatedType();
-  if (auto *ArrayTy = llvm::dyn_cast<llvm::ArrayType>(FragmentTy))
-  {
+  if (auto *ArrayTy = llvm::dyn_cast<llvm::ArrayType>(FragmentTy)) {
     FragmentTy = ArrayTy->getElementType();
   }
 
@@ -108,149 +87,261 @@ static unsigned FragmentSizeInBitsFromAlloca(
   return FragmentSizeInBits;
 }
 
-static unsigned InitialOffsetInBitsFromDIExpression(
-    const llvm::DataLayout &DataLayout,
-    llvm::AllocaInst *Alloca,
-    llvm::DIExpression *Expression
-)
-{
+static unsigned
+InitialOffsetInBitsFromDIExpression(const llvm::DataLayout &DataLayout,
+                                    llvm::AllocaInst *Alloca,
+                                    llvm::DIExpression *Expression) {
   unsigned FragmentOffsetInBits = 0;
-  if (Expression->getNumElements() > 0)
-  {
-    if (!Expression->isBitPiece())
-    {
+  if (Expression->getNumElements() > 0) {
+    if (Expression->getNumElements() == 1 &&
+      Expression->expr_op_begin()->getOp() == llvm::dwarf::DW_OP_deref) {
+      return 0;
+    }
+    else if (!Expression->isBitPiece()) {
       assert(!"Unhandled DIExpression");
       throw hlsl::Exception(E_FAIL, "Unhandled DIExpression");
     }
 
     FragmentOffsetInBits = Expression->getBitPieceOffset();
     assert(Expression->getBitPieceSize() ==
-           DataLayout.getTypeAllocSizeInBits(
-              Alloca->getAllocatedType()));
+           DataLayout.getTypeAllocSizeInBits(Alloca->getAllocatedType()));
   }
 
   return FragmentOffsetInBits;
 }
 
-dxil_debug_info::DILayoutFragmentIterator::DILayoutFragmentIterator(
-    const llvm::DataLayout& DataLayout,
-    llvm::AllocaInst* Alloca,
-    llvm::DIExpression* Expression)
-  : FragmentIterator(NumAllocaElements(Alloca),
-                     FragmentSizeInBitsFromAlloca(DataLayout, Alloca),
-                     InitialOffsetInBitsFromDIExpression(DataLayout,
-                                                         Alloca,
-                                                         Expression))
-{
-}
+///////////////////////////////////////////////////////////////////////////////
+class DILayoutFragmentIterator : public FragmentIteratorBase {
+public:
+  DILayoutFragmentIterator(const llvm::DataLayout &DataLayout,
+                           llvm::AllocaInst *Alloca,
+                           llvm::DIExpression *Expression);
 
-unsigned dxil_debug_info::DILayoutFragmentIterator::CurrOffsetInBits()
-{
-  return
-      m_InitialOffsetInBits + (m_CurrFragment - 1) * m_FragmentSizeInBits;
+  virtual unsigned OffsetInBits(unsigned Index) override;
+};
+
+DILayoutFragmentIterator::DILayoutFragmentIterator(
+    const llvm::DataLayout &DataLayout, llvm::AllocaInst *Alloca,
+    llvm::DIExpression *Expression)
+    : FragmentIteratorBase(NumAllocaElements(Alloca),
+                       FragmentSizeInBitsFromAlloca(DataLayout, Alloca),
+                       InitialOffsetInBitsFromDIExpression(DataLayout, Alloca,
+                                                           Expression)) {}
+
+unsigned DILayoutFragmentIterator::OffsetInBits(unsigned Index) {
+  return m_InitialOffsetInBits + Index * m_FragmentSizeInBits;
 }
 
-static unsigned NumFragmentsFromArrayDims(
-    const std::vector<hlsl::DxilDIArrayDim>& ArrayDims
-)
-{
+///////////////////////////////////////////////////////////////////////////////
+static unsigned
+NumFragmentsFromArrayDims(const std::vector<hlsl::DxilDIArrayDim> &ArrayDims) {
   unsigned TotalNumFragments = 1;
-  for (const hlsl::DxilDIArrayDim& ArrayDim : ArrayDims) {
+  for (const hlsl::DxilDIArrayDim &ArrayDim : ArrayDims) {
     TotalNumFragments *= ArrayDim.NumElements;
   }
   return TotalNumFragments;
 }
 
-static unsigned FragmentSizeInBitsFrom(
-    const llvm::DataLayout& DataLayout,
-    llvm::AllocaInst *Alloca,
-    unsigned TotalNumFragments
-)
-{
+static unsigned FragmentSizeInBitsFrom(const llvm::DataLayout &DataLayout,
+                                       llvm::AllocaInst *Alloca,
+                                       unsigned TotalNumFragments) {
   const unsigned TotalSizeInBits =
-      DataLayout.getTypeAllocSizeInBits(
-          Alloca->getAllocatedType());
+      DataLayout.getTypeAllocSizeInBits(Alloca->getAllocatedType());
 
   if (TotalNumFragments == 0 || TotalSizeInBits % TotalNumFragments != 0) {
     assert(!"Malformed variable debug layout metadata.");
-    throw hlsl::Exception(
-        E_FAIL,
-        "Malformed variable debug layout metadata.");
+    throw hlsl::Exception(E_FAIL, "Malformed variable debug layout metadata.");
   }
-
   const unsigned FragmentSizeInBits = TotalSizeInBits / TotalNumFragments;
   return FragmentSizeInBits;
 }
 
-dxil_debug_info::DebugLayoutFragmentIterator::DebugLayoutFragmentIterator(
-    const llvm::DataLayout& DataLayout,
-    llvm::AllocaInst* Alloca,
-    unsigned InitialOffsetInBits,
-    const std::vector<hlsl::DxilDIArrayDim>& ArrayDims)
-  : FragmentIterator(NumFragmentsFromArrayDims(ArrayDims),
-                     FragmentSizeInBitsFrom(DataLayout,
-                                            Alloca, 
-                                            NumFragmentsFromArrayDims(ArrayDims)),
-                     InitialOffsetInBits)
-  , m_ArrayDims(ArrayDims)
-{
-}
+///////////////////////////////////////////////////////////////////////////////
+class DebugLayoutFragmentIterator : public FragmentIteratorBase {
+public:
+  DebugLayoutFragmentIterator(
+      const llvm::DataLayout &DataLayout, llvm::AllocaInst *Alloca,
+      unsigned InitialOffsetInBits,
+      const std::vector<hlsl::DxilDIArrayDim> &ArrayDims);
 
-unsigned dxil_debug_info::DebugLayoutFragmentIterator::CurrOffsetInBits()
-{
+  virtual unsigned OffsetInBits(unsigned Index) override;
+
+private:
+  std::vector<hlsl::DxilDIArrayDim> m_ArrayDims;
+};
+
+DebugLayoutFragmentIterator::DebugLayoutFragmentIterator(
+    const llvm::DataLayout &DataLayout, llvm::AllocaInst *Alloca,
+    unsigned InitialOffsetInBits,
+    const std::vector<hlsl::DxilDIArrayDim> &ArrayDims)
+    : FragmentIteratorBase(
+          NumFragmentsFromArrayDims(ArrayDims),
+          FragmentSizeInBitsFrom(DataLayout, Alloca,
+                                 NumFragmentsFromArrayDims(ArrayDims)),
+          InitialOffsetInBits),
+      m_ArrayDims(ArrayDims) {}
+
+unsigned DebugLayoutFragmentIterator::OffsetInBits(unsigned Index) {
   // Figure out the offset of this fragment in the original
   unsigned FragmentOffsetInBits = m_InitialOffsetInBits;
-  unsigned RemainingIndex = m_CurrFragment - 1;
-  for (const hlsl::DxilDIArrayDim& ArrayDim : m_ArrayDims) {
-      FragmentOffsetInBits += (RemainingIndex % ArrayDim.NumElements) * ArrayDim.StrideInBits;
-      RemainingIndex /= ArrayDim.NumElements;
+  unsigned RemainingIndex = Index;
+  for (const hlsl::DxilDIArrayDim &ArrayDim : m_ArrayDims) {
+    FragmentOffsetInBits +=
+        (RemainingIndex % ArrayDim.NumElements) * ArrayDim.StrideInBits;
+    RemainingIndex /= ArrayDim.NumElements;
   }
   assert(RemainingIndex == 0);
   return FragmentOffsetInBits;
 }
 
 
-std::unique_ptr<dxil_debug_info::FragmentIterator>
-dxil_debug_info::FragmentIterator::Create
-(
-    llvm::DbgDeclareInst *DbgDeclare,
-    const llvm::DataLayout& DataLayout,
-    llvm::AllocaInst* Alloca,
-    llvm::DIExpression* Expression
-)
-{
-  bool HasVariableDebugLayout = false;
-  unsigned FirstFragmentOffsetInBits;
-  std::vector<hlsl::DxilDIArrayDim> ArrayDims;
+///////////////////////////////////////////////////////////////////////////////
+class CompositeTypeFragmentIterator : public dxil_debug_info::MemberIterator {
+public:
+  CompositeTypeFragmentIterator(llvm::DICompositeType* CT);
 
-  std::unique_ptr<dxil_debug_info::FragmentIterator> Iter;
+  virtual unsigned SizeInBits(unsigned Index) const override;
+  virtual unsigned OffsetInBits(unsigned Index) override;
+  virtual bool Next(unsigned* FragmentIndex) override;
 
-  try
+private:
+  struct FragmentSizeAndOffset
   {
-    HasVariableDebugLayout = 
-        hlsl::DxilMDHelper::GetVariableDebugLayout(
-            DbgDeclare,
-            FirstFragmentOffsetInBits,
-            ArrayDims);
+    unsigned Size;
+    unsigned Offset;
+  };
+  std::vector<FragmentSizeAndOffset> m_fragmentLocations;
+  unsigned m_currentFragment = 0;
+  void CompositeTypeFragmentIterator::DetermineStructMemberSizesAndOffsets(
+    llvm::DIType const*);
+};
 
-    if (HasVariableDebugLayout)
+
+void CompositeTypeFragmentIterator::DetermineStructMemberSizesAndOffsets(llvm::DIType const*diType) 
+{
+  auto AddANewFragment = [=](llvm::DIType const * type)
+  {
+    unsigned size = static_cast<unsigned>(type->getSizeInBits());
+    if (m_fragmentLocations.empty())
     {
-      Iter.reset(new DebugLayoutFragmentIterator(
-          DataLayout,
-          Alloca,
-          FirstFragmentOffsetInBits,
-          ArrayDims));
+      m_fragmentLocations.push_back({ size, 0 });
     }
     else
     {
-      Iter.reset(new DILayoutFragmentIterator(
-          DataLayout,
-          Alloca,
-          Expression));
+      unsigned offset = m_fragmentLocations.back().Offset + m_fragmentLocations.back().Size;
+      m_fragmentLocations.push_back({ size, offset });
     }
+  };
+
+  if (auto* BT = llvm::dyn_cast<llvm::DIBasicType>(diType)) 
+  {
+    AddANewFragment(BT);
   }
-  catch (const hlsl::Exception &)
+  else if (auto* CT = llvm::dyn_cast<llvm::DICompositeType>(diType))
   {
+    switch (diType->getTag())
+    {
+    case llvm::dwarf::DW_TAG_array_type :
+    {
+      llvm::DINodeArray elements = CT->getElements();
+      unsigned arraySize = 1;
+      for (auto const& node : elements)
+      {
+        if (llvm::DISubrange* SR = llvm::dyn_cast<llvm::DISubrange>(node))
+        {
+          arraySize *= SR->getCount();
+        }
+      }
+      const llvm::DITypeIdentifierMap EmptyMap;
+      llvm::DIType *BT = CT->getBaseType().resolve(EmptyMap);
+      for (unsigned i = 0; i < arraySize; ++i) {
+        DetermineStructMemberSizesAndOffsets(BT);
+      }
+    }
+      break;
+    case llvm::dwarf::DW_TAG_class_type:
+    case llvm::dwarf::DW_TAG_structure_type:
+      for (auto const& node : CT->getElements())
+      {
+        if (llvm::DIType* subType = llvm::dyn_cast<llvm::DIType>(node))
+        {
+          DetermineStructMemberSizesAndOffsets(subType);
+        }
+
+      }
+      break;
+    default:
+      diType->dump();
+      break;
+    }
+  }
+  else if (auto *DT = llvm::dyn_cast<llvm::DIDerivedType>(diType)) 
+  {
+    const llvm::DITypeIdentifierMap EmptyMap;
+    llvm::DIType *BT = DT->getBaseType().resolve(EmptyMap);
+    DetermineStructMemberSizesAndOffsets(BT);
+  }
+  else
+  {
+    assert(!"Unhandled DIType");
+    throw hlsl::Exception(E_FAIL, "Unhandled DIType");
+  }
+}
+
+CompositeTypeFragmentIterator::CompositeTypeFragmentIterator(
+  llvm::DICompositeType* CT)
+{
+  DetermineStructMemberSizesAndOffsets(CT);
+}
+
+unsigned CompositeTypeFragmentIterator::SizeInBits(unsigned Index) const
+{
+  return m_fragmentLocations[Index].Size;
+}
+
+unsigned CompositeTypeFragmentIterator::OffsetInBits(unsigned Index)
+{
+  return m_fragmentLocations[Index].Offset;
+}
+
+bool CompositeTypeFragmentIterator::Next(
+    unsigned *FragmentIndex) 
+{
+  *FragmentIndex = m_currentFragment;
+  m_currentFragment++;
+  return m_currentFragment <= static_cast<unsigned>(m_fragmentLocations.size());
+}
+
+///////////////////////////////////////////////////////////////////////////////
+std::unique_ptr<dxil_debug_info::MemberIterator>
+dxil_debug_info::CreateMemberIterator(llvm::DbgDeclareInst *DbgDeclare,
+                                          const llvm::DataLayout &DataLayout,
+                                          llvm::AllocaInst *Alloca,
+                                          llvm::DIExpression *Expression) {
+  bool HasVariableDebugLayout = false;
+  unsigned FirstFragmentOffsetInBits;
+  std::vector<hlsl::DxilDIArrayDim> ArrayDims;
+
+  std::unique_ptr<dxil_debug_info::MemberIterator> Iter;
+
+  try {
+    HasVariableDebugLayout = hlsl::DxilMDHelper::GetVariableDebugLayout(
+        DbgDeclare, FirstFragmentOffsetInBits, ArrayDims);
+
+    if (HasVariableDebugLayout) {
+      Iter.reset(new DebugLayoutFragmentIterator(
+          DataLayout, Alloca, FirstFragmentOffsetInBits, ArrayDims));
+    } else {
+      llvm::DICompositeType *CT = llvm::dyn_cast<llvm::DICompositeType>(
+          DbgDeclare->getVariable()->getType());
+      if (CT != nullptr && Expression->getNumElements() == 0) {
+        Iter.reset(new CompositeTypeFragmentIterator(CT));
+      } else {
+        Iter.reset(
+            new DILayoutFragmentIterator(DataLayout, Alloca, Expression));
+      }
+    }
+  } catch (const hlsl::Exception &) {
     return nullptr;
   }
 

+ 13 - 27
lib/DxilDia/DxcPixLiveVariables_FragmentIterator.h

@@ -24,34 +24,20 @@ class DIExpression;
 
 namespace dxil_debug_info
 {
-class FragmentIterator
+class MemberIterator
 {
 public:
-  virtual ~FragmentIterator() = default;
-
-  unsigned FragmentSizeInBits() const;
-
-  virtual unsigned CurrOffsetInBits() = 0;
-
-  bool Next(unsigned *FragmentIndex);
-
-  static std::unique_ptr<FragmentIterator> Create
-  (
-      llvm::DbgDeclareInst *DbgDeclare,
-      const llvm::DataLayout &DataLayout,
-      llvm::AllocaInst *Alloca,
-      llvm::DIExpression *Expression
-  );
-
-protected:
-  FragmentIterator(
-      unsigned NumFragments,
-      unsigned FragmentSizeInBits,
-      unsigned InitialOffsetInBits);
-
-  unsigned m_CurrFragment = 0;
-  unsigned m_NumFragments = 0;
-  unsigned m_FragmentSizeInBits = 0;
-  unsigned m_InitialOffsetInBits = 0;
+  virtual ~MemberIterator() = default;
+  virtual bool Next(unsigned *Index) = 0;
+  virtual unsigned SizeInBits(unsigned Index) const = 0;
+  virtual unsigned OffsetInBits(unsigned Index) = 0;
 };
+
+std::unique_ptr<MemberIterator> CreateMemberIterator
+(
+  llvm::DbgDeclareInst *DbgDeclare, 
+  const llvm::DataLayout &DataLayout,
+  llvm::AllocaInst *Alloca, 
+  llvm::DIExpression *Expression
+);
 }  // namespace dxil_debug_info

+ 10 - 0
lib/DxilDia/DxcPixTypes.cpp

@@ -162,6 +162,16 @@ STDMETHODIMP dxil_debug_info::DxcPixArrayType::GetSizeInBits(
     _Outptr_result_z_ DWORD *pSizeInBits)
 {
   *pSizeInBits = m_pArray->getSizeInBits();
+  for (unsigned ContainerDims = 0; ContainerDims < m_DimNum; ++ContainerDims)
+  {
+    auto *SR = llvm::dyn_cast<llvm::DISubrange>(m_pArray->getElements()[ContainerDims]);
+    auto count = SR->getCount();
+    if (count == 0)
+    {
+      return E_FAIL;
+    }
+    *pSizeInBits /= count;
+  }
   return S_OK;
 }
 

+ 206 - 32
lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp

@@ -3,6 +3,7 @@
 // DxilAnnotateWithVirtualRegister.cpp                                       //
 // Copyright (C) Microsoft Corporation. All rights reserved.                 //
 // This file is distributed under the University of Illinois Open Source     //
+// This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
 // Annotates the llvm instructions with a virtual register number to be used //
@@ -20,11 +21,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Type.h"
@@ -34,9 +37,30 @@
 
 #define DEBUG_TYPE "dxil-annotate-with-virtual-regs"
 
+uint32_t CountStructMembers(llvm::Type const *pType) {
+  uint32_t Count = 0;
+
+  if (auto *ST = llvm::dyn_cast<llvm::StructType>(pType)) {
+    for (auto &El : ST->elements()) {
+      Count += CountStructMembers(El);
+    }
+  } else if (auto *AT = llvm::dyn_cast<llvm::ArrayType>(pType)) {
+    Count = CountStructMembers(AT->getArrayElementType()) *
+            AT->getArrayNumElements();
+  } else {
+    Count = 1;
+  }
+  return Count;
+}
+
 namespace {
 using namespace pix_dxil;
 
+static bool IsInstrumentableFundamentalType(llvm::Type *pAllocaTy) {
+  return
+    pAllocaTy->isFloatingPointTy() || pAllocaTy->isIntegerTy();
+}
+
 class DxilAnnotateWithVirtualRegister : public llvm::ModulePass {
 public:
   static char ID;
@@ -52,9 +76,11 @@ private:
   void AnnotateAlloca(llvm::AllocaInst *pAlloca);
   void AnnotateGeneric(llvm::Instruction *pI);
   void AssignNewDxilRegister(llvm::Instruction *pI);
-  void AssignNewAllocaRegister(llvm::AllocaInst *pAlloca, std::uint32_t C);
+  void PrintSingleRegister(llvm::Instruction* pI, uint32_t Register);
+  void AssignNewAllocaRegister(llvm::AllocaInst* pAlloca, std::uint32_t C);
+  void PrintAllocaMember(llvm::AllocaInst* pAlloca, uint32_t Base, uint32_t Offset);
 
-  hlsl::DxilModule *m_DM;
+  hlsl::DxilModule* m_DM;
   std::uint32_t m_uVReg;
   std::unique_ptr<llvm::ModuleSlotTracker> m_MST;
   void Init(llvm::Module &M) {
@@ -72,11 +98,16 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
   if (m_DM == nullptr) {
     return false;
   }
+  unsigned int Major = 0;
+  unsigned int Minor = 0;
+  m_DM->GetDxilVersion(Major, Minor);
+  if (Major < 6 || Major == 6 && Minor <= 4) {
+    m_DM->SetValidatorVersion(1, 4);
+  }
 
   std::uint32_t InstNum = 0;
   for (llvm::Instruction &I : llvm::inst_range(m_DM->GetEntryFunction())) {
-    if (!llvm::isa<llvm::DbgDeclareInst>(&I))
-    {
+    if (!llvm::isa<llvm::DbgDeclareInst>(&I)) {
       pix_dxil::PixDxilInstNum::AddMD(M.getContext(), &I, InstNum++);
     }
   }
@@ -110,8 +141,10 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
 }
 
 void DxilAnnotateWithVirtualRegister::AnnotateValues(llvm::Instruction *pI) {
-  if (auto *pAlloca = llvm::dyn_cast<llvm::AllocaInst>(pI)) {
+  if (auto* pAlloca = llvm::dyn_cast<llvm::AllocaInst>(pI)) {
     AnnotateAlloca(pAlloca);
+  } else if (!pI->getType()->isPointerTy()) {
+    AnnotateGeneric(pI);
   } else if (!pI->getType()->isVoidTy()) {
     AnnotateGeneric(pI);
   }
@@ -137,6 +170,69 @@ void DxilAnnotateWithVirtualRegister::AnnotateStore(llvm::Instruction *pI) {
   PixAllocaRegWrite::AddMD(m_DM->GetCtx(), pSt, AllocaReg, Index);
 }
 
+static uint32_t GetStructOffset(
+  llvm::GetElementPtrInst* pGEP,
+  uint32_t& GEPOperandIndex,
+  llvm::Type* pElementType)
+{
+  if (IsInstrumentableFundamentalType(pElementType)) {
+    return 0;
+  }
+  else if (auto* pArray = llvm::dyn_cast<llvm::ArrayType>(pElementType))
+  {
+    // 1D-array example:
+    //
+    // When referring to the zeroth member of the array in this struct:
+    // struct smallPayload {
+    //   uint32_t Array[2];
+    // };
+    // getelementptr inbounds% struct.smallPayload, % struct.smallPayload*% p,
+    // i32 0, i32 0, i32 0 The zeros above are:
+    //  -The zeroth element in the array pointed to (so, the actual struct)
+    //  -The zeroth element in the struct (which is the array)
+    //  -The zeroth element in that array
+
+    auto* pArrayIndex =
+      llvm::dyn_cast<llvm::ConstantInt>(pGEP->getOperand(GEPOperandIndex++));
+
+    if (pArrayIndex == nullptr) {
+      return 0;
+    }
+
+    uint32_t ArrayIndex = pArrayIndex->getLimitedValue();
+    auto pArrayElementType = pArray->getArrayElementType();
+    uint32_t MemberIndex = ArrayIndex * CountStructMembers(pArrayElementType);
+    return MemberIndex + GetStructOffset(pGEP, GEPOperandIndex, pArrayElementType);
+  }
+  else if (auto* pStruct = llvm::dyn_cast<llvm::StructType>(pElementType))
+  {
+    DXASSERT(GEPOperandIndex < pGEP->getNumOperands(), "Unexpectedly read too many GetElementPtrInst operands");
+
+    auto* pMemberIndex =
+      llvm::dyn_cast<llvm::ConstantInt>(pGEP->getOperand(GEPOperandIndex++));
+
+    if (pMemberIndex == nullptr) {
+      return 0;
+    }
+
+    uint32_t MemberIndex = pMemberIndex->getLimitedValue();
+
+    uint32_t MemberOffset = 0;
+    for (uint32_t i = 0; i < MemberIndex; ++i)
+    {
+      MemberOffset += CountStructMembers(pStruct->getElementType(i));
+    }
+
+    return MemberOffset +
+      GetStructOffset(pGEP, GEPOperandIndex, pStruct->getElementType(MemberIndex));
+  }
+  else
+  {
+    return 0;
+  }
+}
+
+
 bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite(
     llvm::Value *V, llvm::AllocaInst **pAI, llvm::Value **pIdx) {
   llvm::IRBuilder<> B(m_DM->GetCtx());
@@ -145,30 +241,72 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite(
   *pIdx = nullptr;
 
   if (auto *pGEP = llvm::dyn_cast<llvm::GetElementPtrInst>(V)) {
+    uint32_t precedingMemberCount = 0;
     auto *Alloca = llvm::dyn_cast<llvm::AllocaInst>(pGEP->getPointerOperand());
     if (Alloca == nullptr) {
-      return false;
+      // In the case of vector types (floatN, matrixNxM), the pointer operand will actually
+      // point to another element pointer instruction. But this isn't a recursive thing-
+      // we only need to check these two levels.
+      if (auto* pPointerGEP = llvm::dyn_cast<llvm::GetElementPtrInst>(pGEP->getPointerOperand())) {
+        Alloca =
+            llvm::dyn_cast<llvm::AllocaInst>(pPointerGEP->getPointerOperand());
+        if (Alloca == nullptr) {
+          return false;
+        }
+        // And of course the member we're after might not be at the beginning of the struct:
+        auto* pStructType  = llvm::dyn_cast<llvm::StructType>(pPointerGEP->getPointerOperandType()->getPointerElementType());
+        auto* pStructMember = llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
+        uint64_t memberIndex = pStructMember->getLimitedValue();
+        for(uint64_t i = 0; i < memberIndex; ++i)
+        {
+          precedingMemberCount += CountStructMembers(pStructType->getStructElementType(i));
+        }
+      }
+      else
+      {
+        return false;
+      }
     }
 
-    llvm::SmallVector<llvm::Value *, 2> Indices(pGEP->idx_begin(),
-                                                pGEP->idx_end());
-    if (Indices.size() != 2) {
-      return false;
-    }
-    auto *pIdx0 = llvm::dyn_cast<llvm::ConstantInt>(Indices[0]);
 
-    if (pIdx0 == nullptr || pIdx0->getLimitedValue() != 0) {
-      return false;
-    }
+    // Deref pointer type to get struct type:
+    llvm::Type *pStructType = pGEP->getPointerOperandType();
+    pStructType = pStructType->getContainedType(0);
 
-    *pAI = Alloca;
-    *pIdx = Indices[1];
-    return true;
+    // The 1th operand is an index into the array of the above type. In DXIL derived from HLSL,
+    // we always expect this to be 0 (since llvm structs are only used for single-valued
+    // objects in HLSL, such as the amplification-to-mesh or TraceRays payloads).
+    uint32_t GEPOperandIndex = 1;
+    auto *pBaseArrayIndex =
+        llvm::dyn_cast<llvm::ConstantInt>(pGEP->getOperand(GEPOperandIndex++));
+    DXASSERT(pBaseArrayIndex != nullptr, "null base array index pointer");
+    DXASSERT(pBaseArrayIndex->getLimitedValue() == 0, "unexpected >0 array index");
+    pBaseArrayIndex;
+
+    // From here on, the indices always come in groups: first, the type 
+    // referenced in the current struct. If that type is an (n-dimensional)
+    // array, then there follow n indices.
+
+
+    auto offset = GetStructOffset(
+      pGEP,
+      GEPOperandIndex,
+      pStructType);
+
+    llvm::Value* IndexValue = B.getInt32(offset + precedingMemberCount);
+
+    if (IndexValue != nullptr)
+    {
+      *pAI = Alloca;
+      *pIdx = IndexValue;
+      return true;
+    }
+    return false;
   }
 
   if (auto *pAlloca = llvm::dyn_cast<llvm::AllocaInst>(V)) {
     llvm::Type *pAllocaTy = pAlloca->getType()->getElementType();
-    if (!pAllocaTy->isFloatTy() && !pAllocaTy->isIntegerTy()) {
+    if (!IsInstrumentableFundamentalType(pAllocaTy)) {
       return false;
     }
 
@@ -183,44 +321,80 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite(
 void DxilAnnotateWithVirtualRegister::AnnotateAlloca(
     llvm::AllocaInst *pAlloca) {
   llvm::Type *pAllocaTy = pAlloca->getType()->getElementType();
-  if (pAllocaTy->isFloatTy() || pAllocaTy->isIntegerTy() ||
-      pAllocaTy->isHalfTy() || pAllocaTy->isIntegerTy(16)) {
+  if (IsInstrumentableFundamentalType(pAllocaTy)) {
     AssignNewAllocaRegister(pAlloca, 1);
   } else if (auto *AT = llvm::dyn_cast<llvm::ArrayType>(pAllocaTy)) {
     AssignNewAllocaRegister(pAlloca, AT->getNumElements());
+  } else if (auto *ST = llvm::dyn_cast<llvm::StructType>(pAllocaTy)) {
+    AssignNewAllocaRegister(pAlloca, CountStructMembers(ST));
   } else {
     DXASSERT_ARGS(false, "Unhandled alloca kind: %d", pAllocaTy->getTypeID());
   }
 }
 
 void DxilAnnotateWithVirtualRegister::AnnotateGeneric(llvm::Instruction *pI) {
-  if (!pI->getType()->isFloatTy() && !pI->getType()->isIntegerTy()) {
-    return;
+  if (auto *GEP = llvm::dyn_cast<llvm::GetElementPtrInst>(pI)) {
+    // https://llvm.org/docs/LangRef.html#getelementptr-instruction
+    DXASSERT(!GEP->getOperand(1)->getType()->isVectorTy(),
+             "struct vectors not supported");
+    llvm::AllocaInst *StructAlloca =
+        llvm::dyn_cast<llvm::AllocaInst>(GEP->getOperand(0));
+    if (StructAlloca != nullptr) {
+      // This is the case of a pointer to a struct member. 
+      // We treat it as an alias of the actual member in the alloca.
+      std::uint32_t baseStructRegNum = 0;
+      std::uint32_t regSize = 0;
+      if (pix_dxil::PixAllocaReg::FromInst(StructAlloca, &baseStructRegNum, & regSize)) {
+        llvm::ConstantInt *OffsetAsInt =
+            llvm::dyn_cast<llvm::ConstantInt>(GEP->getOperand(2));
+        std::uint32_t Offset = static_cast<std::uint32_t>(
+            OffsetAsInt->getValue().getLimitedValue());
+        DXASSERT(Offset < regSize,
+                 "Structure member offset out of expected range");
+        PixDxilReg::AddMD(m_DM->GetCtx(), pI, baseStructRegNum + Offset);
+      }
+    }
+  } else {
+    if (!IsInstrumentableFundamentalType(pI->getType())) {
+      return;
+    }
+    AssignNewDxilRegister(pI);
   }
-  AssignNewDxilRegister(pI);
 }
 
 void DxilAnnotateWithVirtualRegister::AssignNewDxilRegister(
     llvm::Instruction *pI) {
   PixDxilReg::AddMD(m_DM->GetCtx(), pI, m_uVReg);
-  if (OSOverride != nullptr) {
-    static constexpr bool DontPrintType = false;
-    pI->printAsOperand(*OSOverride, DontPrintType, *m_MST.get());
-    *OSOverride << " dxil " << m_uVReg << "\n";
-  }
+  PrintSingleRegister(pI, m_uVReg);
   m_uVReg++;
 }
 
 void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister(
     llvm::AllocaInst *pAlloca, std::uint32_t C) {
   PixAllocaReg::AddMD(m_DM->GetCtx(), pAlloca, m_uVReg, C);
+  PrintAllocaMember(pAlloca, m_uVReg, C);
+  m_uVReg += C;
+}
+
+void DxilAnnotateWithVirtualRegister::PrintSingleRegister(
+    llvm::Instruction* pI, uint32_t Register) {
+  if (OSOverride != nullptr) {
+    static constexpr bool DontPrintType = false;
+    pI->printAsOperand(*OSOverride, DontPrintType, *m_MST.get());
+    *OSOverride << " dxil " << Register << "\n";
+  }
+}
+
+void DxilAnnotateWithVirtualRegister::PrintAllocaMember(llvm::AllocaInst* pAlloca,
+                                                   uint32_t Base,
+                                                   uint32_t Offset) {
   if (OSOverride != nullptr) {
     static constexpr bool DontPrintType = false;
     pAlloca->printAsOperand(*OSOverride, DontPrintType, *m_MST.get());
-    *OSOverride << " alloca " << m_uVReg << " " << C << "\n";
+    *OSOverride << " alloca " << Base << " " << Offset << "\n";
   }
-  m_uVReg += C;
 }
+
 } // namespace
 
 using namespace llvm;

+ 1 - 0
tools/clang/unittests/HLSL/CMakeLists.txt

@@ -86,6 +86,7 @@ if (WIN32)
 target_link_libraries(clang-hlsl-tests PRIVATE
   dxcompiler
   HLSLTestLib
+  LLVMDxilDia
   ${TAEF_LIBRARIES}
   ${DIASDK_LIBRARIES}
   ${D3D12_LIBRARIES}

+ 1032 - 2
tools/clang/unittests/HLSL/PixTest.cpp

@@ -32,6 +32,8 @@
 #include <atlfile.h>
 #include "dia2.h"
 
+#include "dxc/DXIL/DxilModule.h"
+
 #include "dxc/Test/HLSLTestData.h"
 #include "dxc/Test/HlslTestUtils.h"
 #include "dxc/Test/DxcTestUtils.h"
@@ -44,13 +46,27 @@
 #include "dxc/Support/Unicode.h"
 
 #include <fstream>
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MSFileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 
+
+#include <../lib/DxilDia/DxcPixLiveVariables.h>
+#include <../lib/DxilDia/DxcPixLiveVariables_FragmentIterator.h>
+#include <dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h>
+
 using namespace std;
+using namespace hlsl;
 using namespace hlsl_test;
 
 // Aligned to SymTagEnum.
@@ -139,6 +155,18 @@ const char* UdtKindText[] =
   "Interface",
 };
 
+static std::vector<std::string> Tokenize(const std::string &str,
+                                         const char *delimiters) {
+  std::vector<std::string> tokens;
+  std::string copy = str;
+
+  for (auto i = strtok(&copy[0], delimiters); i != nullptr;
+       i = strtok(nullptr, delimiters)) {
+    tokens.push_back(i);
+  }
+
+  return tokens;
+}
 
 class PixTest {
 public:
@@ -164,6 +192,19 @@ public:
   TEST_METHOD(DiaCompileArgs)
   TEST_METHOD(PixDebugCompileInfo)
 
+  TEST_METHOD(PixStructAnnotation_Simple)
+  TEST_METHOD(PixStructAnnotation_CopiedStruct)
+  TEST_METHOD(PixStructAnnotation_MixedSizes)
+  TEST_METHOD(PixStructAnnotation_StructWithinStruct)
+  TEST_METHOD(PixStructAnnotation_1DArray)
+  TEST_METHOD(PixStructAnnotation_2DArray)
+  TEST_METHOD(PixStructAnnotation_EmbeddedArray)
+  TEST_METHOD(PixStructAnnotation_FloatN)
+  TEST_METHOD(PixStructAnnotation_SequentialFloatN)
+  TEST_METHOD(PixStructAnnotation_EmbeddedFloatN)
+  TEST_METHOD(PixStructAnnotation_Matrix)
+  TEST_METHOD(PixStructAnnotation_BigMess)
+
   dxc::DxcDllSupport m_dllSupport;
 
   void CreateBlobPinned(_In_bytecount_(size) LPCVOID data, SIZE_T size,
@@ -487,7 +528,8 @@ public:
     return option.substr(0, option.find_first_of(' '));
   }
 
-  HRESULT CreateDiaSourceForCompile(const char *hlsl, IDiaDataSource **ppDiaSource)
+  HRESULT CreateDiaSourceForCompile(const char* hlsl,
+    IDiaDataSource** ppDiaSource)
   {
     if (!ppDiaSource)
       return E_POINTER;
@@ -502,6 +544,17 @@ public:
     LPCWSTR args[] = { L"/Zi", L"/Qembed_debug" };
     VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
       L"ps_6_0", args, _countof(args), nullptr, 0, nullptr, &pResult));
+    
+    HRESULT compilationStatus;
+    VERIFY_SUCCEEDED(pResult->GetStatus(&compilationStatus));
+    if (FAILED(compilationStatus))
+    {
+      CComPtr<IDxcBlobEncoding> pErrros;
+      VERIFY_SUCCEEDED(pResult->GetErrorBuffer(&pErrros));
+      CA2W errorTextW(static_cast<const char *>(pErrros->GetBufferPointer()), CP_UTF8);
+      WEX::Logging::Log::Error(errorTextW);
+    }
+
     VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
 
     // Disassemble the compiled (stripped) program.
@@ -550,6 +603,374 @@ public:
     *ppDiaSource = pDiaSource.Detach();
     return S_OK;
   }
+  
+  CComPtr<IDxcOperationResult> Compile(
+    const char* hlsl,
+    const wchar_t* target)
+  {
+    CComPtr<IDxcCompiler> pCompiler;
+    CComPtr<IDxcOperationResult> pResult;
+    CComPtr<IDxcBlobEncoding> pSource;
+
+    VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+    CreateBlobFromText(hlsl, &pSource);
+    LPCWSTR args[] = { L"/Zi", L"/Od", L"-enable-16bit-types", L"/Qembed_debug" };
+    VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+      target, args, _countof(args), nullptr, 0, nullptr, &pResult));
+
+    HRESULT compilationStatus;
+    VERIFY_SUCCEEDED(pResult->GetStatus(&compilationStatus));
+    if (FAILED(compilationStatus))
+    {
+      CComPtr<IDxcBlobEncoding> pErrros;
+      VERIFY_SUCCEEDED(pResult->GetErrorBuffer(&pErrros));
+      CA2W errorTextW(static_cast<const char*>(pErrros->GetBufferPointer()), CP_UTF8);
+      WEX::Logging::Log::Error(errorTextW);
+    }
+
+#if 0 //handy for debugging
+    {
+      CComPtr<IDxcBlob> pProgram;
+      CheckOperationSucceeded(pResult, &pProgram);
+
+      CComPtr<IDxcLibrary> pLib;
+      VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
+      const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike(
+          pProgram->GetBufferPointer(), pProgram->GetBufferSize());
+      VERIFY_IS_NOT_NULL(pContainer);
+      hlsl::DxilPartIterator partIter =
+          std::find_if(hlsl::begin(pContainer), hlsl::end(pContainer),
+                       hlsl::DxilPartIsType(hlsl::DFCC_ShaderDebugInfoDXIL));
+      const hlsl::DxilProgramHeader *pProgramHeader =
+          (const hlsl::DxilProgramHeader *)hlsl::GetDxilPartData(*partIter);
+      uint32_t bitcodeLength;
+      const char *pBitcode;
+      CComPtr<IDxcBlob> pProgramPdb;
+      hlsl::GetDxilProgramBitcode(pProgramHeader, &pBitcode, &bitcodeLength);
+      VERIFY_SUCCEEDED(pLib->CreateBlobFromBlob(
+          pProgram, pBitcode - (char *)pProgram->GetBufferPointer(),
+          bitcodeLength, &pProgramPdb));
+
+      CComPtr<IDxcBlobEncoding> pDbgDisassembly;
+      VERIFY_SUCCEEDED(pCompiler->Disassemble(pProgramPdb, &pDbgDisassembly));
+      std::string disText = BlobToUtf8(pDbgDisassembly);
+      CA2W disTextW(disText.c_str(), CP_UTF8);
+      WEX::Logging::Log::Comment(disTextW);
+    }
+#endif
+
+    return pResult;
+  }
+
+  CComPtr<IDxcBlob> ExtractDxilPart(IDxcBlob *pProgram) {
+    CComPtr<IDxcLibrary> pLib;
+    VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
+    const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike(
+        pProgram->GetBufferPointer(), pProgram->GetBufferSize());
+    VERIFY_IS_NOT_NULL(pContainer);
+    hlsl::DxilPartIterator partIter =
+        std::find_if(hlsl::begin(pContainer), hlsl::end(pContainer),
+                     hlsl::DxilPartIsType(hlsl::DFCC_DXIL));
+    const hlsl::DxilProgramHeader *pProgramHeader =
+        (const hlsl::DxilProgramHeader *)hlsl::GetDxilPartData(*partIter);
+    uint32_t bitcodeLength;
+    const char *pBitcode;
+    CComPtr<IDxcBlob> pDxilBits;
+    hlsl::GetDxilProgramBitcode(pProgramHeader, &pBitcode, &bitcodeLength);
+    VERIFY_SUCCEEDED(pLib->CreateBlobFromBlob(
+        pProgram, pBitcode - (char *)pProgram->GetBufferPointer(),
+        bitcodeLength, &pDxilBits));
+    return pDxilBits;
+  }
+
+  struct ValueLocation
+  {
+    int base;
+    int count;
+  };
+
+  struct PassOutput
+  {
+    CComPtr<IDxcBlob> blob;
+    std::vector<ValueLocation> valueLocations;
+  };
+
+  PassOutput RunAnnotationPasses(IDxcBlob * dxil)
+  {
+    CComPtr<IDxcOptimizer> pOptimizer;
+    VERIFY_SUCCEEDED(
+        m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer));
+    std::vector<LPCWSTR> Options;
+    Options.push_back(L"-opt-mod-passes");
+    Options.push_back(L"-dxil-dbg-value-to-dbg-declare");
+    Options.push_back(L"-dxil-annotate-with-virtual-regs");
+
+    CComPtr<IDxcBlob> pOptimizedModule;
+    CComPtr<IDxcBlobEncoding> pText;
+    VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
+        dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+
+    std::string outputText;
+    if (pText->GetBufferSize() != 0)
+    {
+      outputText = reinterpret_cast<const char*>(pText->GetBufferPointer());
+    }
+
+    auto lines = Tokenize(outputText, "\n");
+
+    std::vector<ValueLocation> valueLocations;
+
+    for (size_t line = 0; line < lines.size(); ++line) {
+      if (lines[line] == "Begin - dxil values to virtual register mapping") {
+        for (++line; line < lines.size(); ++line) {
+          if (lines[line] == "End - dxil values to virtual register mapping") {
+            break;
+          }
+
+          auto lineTokens = Tokenize(lines[line], " ");
+          VERIFY_IS_TRUE(lineTokens.size() >= 2);
+          if (lineTokens[1] == "dxil")
+          {
+            VERIFY_IS_TRUE(lineTokens.size() == 3);
+            valueLocations.push_back({atoi(lineTokens[2].c_str()), 1});
+          }
+          else if (lineTokens[1] == "alloca")
+          {
+            VERIFY_IS_TRUE(lineTokens.size() == 4);
+            valueLocations.push_back(
+                {atoi(lineTokens[2].c_str()), atoi(lineTokens[3].c_str())});
+          }
+          else
+          {
+            VERIFY_IS_TRUE(false);
+          }
+        }
+      }
+    }
+
+    return { std::move(pOptimizedModule), std::move(valueLocations) };
+  }
+
+  std::wstring Disassemble(IDxcBlob * pProgram)
+  {
+    CComPtr<IDxcCompiler> pCompiler;
+    VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+
+    CComPtr<IDxcBlobEncoding> pDbgDisassembly;
+    VERIFY_SUCCEEDED(pCompiler->Disassemble(pProgram, &pDbgDisassembly));
+    std::string disText = BlobToUtf8(pDbgDisassembly);
+    CA2W disTextW(disText.c_str(), CP_UTF8);
+    return std::wstring(disTextW);
+  }
+
+  CComPtr<IDxcBlob> FindModule(hlsl::DxilFourCC fourCC, IDxcBlob *pSource)
+  {
+    const UINT32 BC_C0DE = ((INT32)(INT8)'B' | (INT32)(INT8)'C' << 8 |
+                            (INT32)0xDEC0 << 16); // BC0xc0de in big endian
+    const char *pBitcode = nullptr;
+    const hlsl::DxilPartHeader *pDxilPartHeader =
+        (hlsl::DxilPartHeader *)
+            pSource->GetBufferPointer(); // Initialize assuming that source is
+                                         // starting with DXIL part
+
+    if (BC_C0DE == *(UINT32 *)pSource->GetBufferPointer()) {
+      return pSource;
+    }
+    if (hlsl::IsValidDxilContainer(
+            (hlsl::DxilContainerHeader *)pSource->GetBufferPointer(),
+            pSource->GetBufferSize())) {
+      hlsl::DxilContainerHeader *pDxilContainerHeader =
+          (hlsl::DxilContainerHeader *)pSource->GetBufferPointer();
+      pDxilPartHeader =
+          *std::find_if(begin(pDxilContainerHeader), end(pDxilContainerHeader),
+                        hlsl::DxilPartIsType(fourCC));
+    }
+    if (fourCC == pDxilPartHeader->PartFourCC) {
+      UINT32 pBlobSize;
+      hlsl::DxilProgramHeader *pDxilProgramHeader =
+          (hlsl::DxilProgramHeader *)(pDxilPartHeader + 1);
+      hlsl::GetDxilProgramBitcode(pDxilProgramHeader, &pBitcode, &pBlobSize);
+      UINT32 offset =
+          (UINT32)(pBitcode - (const char *)pSource->GetBufferPointer());
+      CComPtr<IDxcLibrary> library;
+      IFT(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &library));
+      CComPtr<IDxcBlob> targetBlob;
+      library->CreateBlobFromBlob(pSource, offset, pBlobSize, &targetBlob);
+      return targetBlob;
+    }
+    return {};
+  }
+
+
+  void ReplaceDxilBlobPart(
+      const void *originalShaderBytecode, SIZE_T originalShaderLength,
+      IDxcBlob *pNewDxilBlob, IDxcBlob **ppNewShaderOut)
+  {
+    CComPtr<IDxcLibrary> pLibrary;
+    IFT(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLibrary));
+
+    CComPtr<IDxcBlob> pNewContainer;
+
+    // Use the container assembler to build a new container from the
+    // recently-modified DXIL bitcode. This container will contain new copies of
+    // things like input signature etc., which will supersede the ones from the
+    // original compiled shader's container.
+    {
+      CComPtr<IDxcAssembler> pAssembler;
+      IFT(m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
+
+      CComPtr<IDxcOperationResult> pAssembleResult;
+      VERIFY_SUCCEEDED(
+          pAssembler->AssembleToContainer(pNewDxilBlob, &pAssembleResult));
+
+      CComPtr<IDxcBlobEncoding> pAssembleErrors;
+      VERIFY_SUCCEEDED(
+          pAssembleResult->GetErrorBuffer(&pAssembleErrors));
+
+      if (pAssembleErrors && pAssembleErrors->GetBufferSize() != 0) {
+        OutputDebugStringA(
+            static_cast<LPCSTR>(pAssembleErrors->GetBufferPointer()));
+        VERIFY_SUCCEEDED(E_FAIL);
+      }
+
+      VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pNewContainer));
+    }
+
+    // Now copy over the blobs from the original container that won't have been
+    // invalidated by changing the shader code itself, using the container
+    // reflection API
+    { 
+      // Wrap the original code in a container blob
+      CComPtr<IDxcBlobEncoding> pContainer;
+      VERIFY_SUCCEEDED(
+          pLibrary->CreateBlobWithEncodingFromPinned(
+              static_cast<LPBYTE>(const_cast<void *>(originalShaderBytecode)),
+              static_cast<UINT32>(originalShaderLength), CP_ACP, &pContainer));
+
+      CComPtr<IDxcContainerReflection> pReflection;
+      IFT(m_dllSupport.CreateInstance(CLSID_DxcContainerReflection, &pReflection));
+
+      // Load the reflector from the original shader
+      VERIFY_SUCCEEDED(pReflection->Load(pContainer));
+
+      UINT32 partIndex;
+
+      if (SUCCEEDED(pReflection->FindFirstPartKind(hlsl::DFCC_PrivateData,
+                                                   &partIndex))) {
+        CComPtr<IDxcBlob> pPart;
+        VERIFY_SUCCEEDED(
+            pReflection->GetPartContent(partIndex, &pPart));
+
+        CComPtr<IDxcContainerBuilder> pContainerBuilder;
+        IFT(m_dllSupport.CreateInstance(CLSID_DxcContainerBuilder,
+                                        &pContainerBuilder));
+
+        VERIFY_SUCCEEDED(
+            pContainerBuilder->Load(pNewContainer));
+
+        VERIFY_SUCCEEDED(
+            pContainerBuilder->AddPart(hlsl::DFCC_PrivateData, pPart));
+
+        CComPtr<IDxcOperationResult> pBuildResult;
+
+        VERIFY_SUCCEEDED(
+            pContainerBuilder->SerializeContainer(&pBuildResult));
+
+        CComPtr<IDxcBlobEncoding> pBuildErrors;
+        VERIFY_SUCCEEDED(
+            pBuildResult->GetErrorBuffer(&pBuildErrors));
+
+        if (pBuildErrors && pBuildErrors->GetBufferSize() != 0) {
+          OutputDebugStringA(
+              reinterpret_cast<LPCSTR>(pBuildErrors->GetBufferPointer()));
+          VERIFY_SUCCEEDED(E_FAIL);
+        }
+
+        VERIFY_SUCCEEDED(
+            pBuildResult->GetResult(&pNewContainer));
+      }
+    }
+
+    *ppNewShaderOut = pNewContainer.Detach();
+  }
+
+  class ModuleAndHangersOn
+  {
+    std::unique_ptr<llvm::LLVMContext> llvmContext;
+    std::unique_ptr<llvm::Module> llvmModule;
+    DxilModule* dxilModule;
+
+  public:
+    ModuleAndHangersOn(IDxcBlob* pBlob)
+    {
+      // Verify we have a valid dxil container.
+      const DxilContainerHeader *pContainer = IsDxilContainerLike(
+          pBlob->GetBufferPointer(), pBlob->GetBufferSize());
+      VERIFY_IS_NOT_NULL(pContainer);
+      VERIFY_IS_TRUE(IsValidDxilContainer(pContainer, pBlob->GetBufferSize()));
+
+      // Get Dxil part from container.
+      DxilPartIterator it =
+          std::find_if(begin(pContainer), end(pContainer),
+                       DxilPartIsType(DFCC_ShaderDebugInfoDXIL));
+      VERIFY_IS_FALSE(it == end(pContainer));
+
+      const DxilProgramHeader *pProgramHeader =
+          reinterpret_cast<const DxilProgramHeader *>(GetDxilPartData(*it));
+      VERIFY_IS_TRUE(IsValidDxilProgramHeader(pProgramHeader, (*it)->PartSize));
+
+      // Get a pointer to the llvm bitcode.
+      const char *pIL;
+      uint32_t pILLength;
+      GetDxilProgramBitcode(pProgramHeader, &pIL, &pILLength);
+
+      // Parse llvm bitcode into a module.
+      std::unique_ptr<llvm::MemoryBuffer> pBitcodeBuf(
+          llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(pIL, pILLength), "",
+                                           false));
+
+      llvmContext.reset(new llvm::LLVMContext);
+
+      llvm::ErrorOr<std::unique_ptr<llvm::Module>> pModule(
+          llvm::parseBitcodeFile(pBitcodeBuf->getMemBufferRef(),
+                                 *llvmContext));
+      if (std::error_code ec = pModule.getError()) {
+        VERIFY_FAIL();
+      }
+
+      llvmModule = std::move(pModule.get());
+
+      dxilModule =
+          DxilModule::TryGetDxilModule(llvmModule.get());
+    }
+
+    DxilModule& GetDxilModule()
+    {
+      return *dxilModule;
+    }
+  };
+
+  struct AggregateOffsetAndSize
+  {
+    unsigned countOfMembers;
+    unsigned offset;
+    unsigned size;
+  };
+  struct AllocaWrite {
+    std::string memberName;
+    uint32_t regBase;
+    uint32_t regSize;
+    uint64_t index;
+  };
+  struct TestableResults
+  {
+    std::vector<AggregateOffsetAndSize> OffsetAndSizes;
+    std::vector<AllocaWrite> AllocaWrites;
+  };
+
+  TestableResults TestStructAnnotationCase(const char* hlsl);
+  void ValidateAllocaWrite(std::vector<AllocaWrite> const& allocaWrites, size_t index, const char* name);
+
 };
 
 
@@ -687,7 +1108,8 @@ TEST_F(PixTest, CompileDebugPDB) {
 
 TEST_F(PixTest, CompileDebugLines) {
   CComPtr<IDiaDataSource> pDiaSource;
-  VERIFY_SUCCEEDED(CreateDiaSourceForCompile(
+  VERIFY_SUCCEEDED(
+      CreateDiaSourceForCompile(
     "float main(float pos : A) : SV_Target {\r\n"
     "  float x = abs(pos);\r\n"
     "  float y = sin(pos);\r\n"
@@ -1304,5 +1726,613 @@ TEST_F(PixTest, PixDebugCompileInfo) {
   VERIFY_ARE_EQUAL(std::wstring(profile), std::wstring(hlslTarget));
 }
 
+// This function lives in lib\DxilPIXPasses\DxilAnnotateWithVirtualRegister.cpp
+// Declared here so we can test it.
+uint32_t CountStructMembers(llvm::Type const* pType);
+
+PixTest::TestableResults PixTest::TestStructAnnotationCase(const char* hlsl)
+{
+  auto pOperationResult = Compile(hlsl, L"as_6_5");
+  CComPtr<IDxcBlob> pBlob;
+  CheckOperationSucceeded(pOperationResult, &pBlob);
+
+  CComPtr<IDxcBlob> pDxil = FindModule(DFCC_ShaderDebugInfoDXIL, pBlob);
+
+  PassOutput passOutput = RunAnnotationPasses(pDxil);
+
+  auto pAnnotated = passOutput.blob;
+
+  CComPtr<IDxcBlob> pAnnotatedContainer;
+  ReplaceDxilBlobPart(
+    pBlob->GetBufferPointer(),
+    pBlob->GetBufferSize(),
+    pAnnotated,
+    &pAnnotatedContainer);
+
+  ModuleAndHangersOn moduleEtc(pAnnotatedContainer);
+  
+  llvm::Function *entryFunction = moduleEtc.GetDxilModule().GetEntryFunction();
+
+  PixTest::TestableResults ret;
+
+  // For every dbg.declare, run the member iterator and record what it finds:
+  for (auto& block : entryFunction->getBasicBlockList())
+  {
+    for (auto& instruction : block.getInstList())
+    {
+      if (auto* dbgDeclare = llvm::dyn_cast<llvm::DbgDeclareInst>(&instruction))
+      {
+        llvm::Value* Address = dbgDeclare->getAddress();
+        auto* AddressAsAlloca = llvm::dyn_cast<llvm::AllocaInst>(Address);
+        auto* Expression = dbgDeclare->getExpression();
+
+        std::unique_ptr<dxil_debug_info::MemberIterator> iterator = dxil_debug_info::CreateMemberIterator(
+          dbgDeclare,
+          moduleEtc.GetDxilModule().GetModule()->getDataLayout(),
+          AddressAsAlloca,
+          Expression);
+
+        unsigned int startingBit = 0;
+        unsigned int coveredBits = 0;
+        unsigned int memberIndex = 0;
+        while (iterator->Next(&memberIndex))
+        {
+          if (memberIndex == 0)
+          {
+            startingBit = iterator->OffsetInBits(memberIndex);
+            coveredBits = iterator->SizeInBits(memberIndex);
+          }
+          else
+          {
+            // Next member has to start where the previous one ended:
+            VERIFY_ARE_EQUAL(iterator->OffsetInBits(memberIndex), startingBit + coveredBits);
+            coveredBits += iterator->SizeInBits(memberIndex);
+          }
+        }
+
+        // memberIndex is now the count of members in this aggregate type
+        ret.OffsetAndSizes.push_back({ memberIndex, startingBit, coveredBits });
+
+        // Use this independent count of number of struct members to test the 
+        // function that operates on the alloca type:
+        llvm::Type *pAllocaTy = AddressAsAlloca->getType()->getElementType();
+        if (auto *AT = llvm::dyn_cast<llvm::ArrayType>(pAllocaTy))
+        {
+          // This is the case where a struct is passed to a function, and in 
+          // these tests there should be only one struct behind the pointer.
+          VERIFY_ARE_EQUAL(AT->getNumElements(), 1);
+          pAllocaTy = AT->getArrayElementType();
+        }
+
+        if (auto* ST = llvm::dyn_cast<llvm::StructType>(pAllocaTy))
+        {
+          uint32_t countOfMembers = CountStructMembers(ST);
+          VERIFY_ARE_EQUAL(countOfMembers, memberIndex);
+        }
+        else if (pAllocaTy->isFloatingPointTy() || pAllocaTy->isIntegerTy())
+        {
+          // If there's only one member in the struct in the pass-to-function (by pointer)
+          // case, then the underlying type will have been reduced to the contained type.
+          VERIFY_ARE_EQUAL(1, memberIndex);
+        }
+        else
+        {
+          VERIFY_IS_TRUE(false);
+        }
+      }
+    }
+  }
+
+  // The member iterator should find a solid run of bits that is exactly covered
+  // by exactly one of the members found by the annotation pass:
+  for (auto const& cover : ret.OffsetAndSizes)
+  {
+    bool found = false;
+    for (auto const& valueLocation : passOutput.valueLocations)
+    {
+      constexpr unsigned int eightBitsPerByte = 8;
+      if (valueLocation.base * eightBitsPerByte == cover.offset)
+      {
+        VERIFY_IS_FALSE(found);
+        found = true;
+        VERIFY_ARE_EQUAL(valueLocation.count, cover.countOfMembers);
+      }
+    }
+    VERIFY_IS_TRUE(found);
+  }
+
+  // For every store operation to the struct alloca, check that the annotation pass correctly determined which alloca
+  for (auto& block : entryFunction->getBasicBlockList()) {
+    for (auto& instruction : block.getInstList()) {
+      if (auto* store =
+        llvm::dyn_cast<llvm::StoreInst>(&instruction)) {
+
+        if (auto* pGEP = llvm::dyn_cast<llvm::GetElementPtrInst>(store->getPointerOperand()))
+        {
+          ret.AllocaWrites.push_back({});
+          auto& NewAllocaWrite = ret.AllocaWrites.back();
+          llvm::Value* pPointerOperand = pGEP->getPointerOperand();
+          if (auto* pGEP2 = llvm::dyn_cast<llvm::GetElementPtrInst>(pPointerOperand))
+          {
+            auto *pMemberIndex = llvm::dyn_cast<llvm::ConstantInt>(
+                pGEP->getOperand(2));
+            uint64_t memberIndex = pMemberIndex->getLimitedValue();
+            // Until we have debugging info for floatN, matrixNxM etc., we can't get the name:
+            // auto *secondPointer = pGEP2->getPointerOperandType();
+            // auto* pStruct =
+            // llvm::dyn_cast<llvm::StructType>(secondPointer->getVectorElementType());
+            NewAllocaWrite.memberName =
+                "member" + std::to_string(memberIndex);
+          }
+          else
+          {
+            NewAllocaWrite.memberName = pGEP->getName();
+          }
+
+          llvm::Value* index;
+          if (pix_dxil::PixAllocaRegWrite::FromInst(
+            store, 
+            &NewAllocaWrite.regBase, 
+            &NewAllocaWrite.regSize,
+            &index)) {
+            auto* asInt = llvm::dyn_cast<llvm::ConstantInt>(index);
+            NewAllocaWrite.index = asInt->getLimitedValue();
+          }
+        }
+      }
+    }
+  }
+
+  return ret;
+}
+
+void PixTest::ValidateAllocaWrite(std::vector<AllocaWrite> const &allocaWrites,
+                                  size_t index, const char *name) {
+  VERIFY_ARE_EQUAL(index, allocaWrites[index].index);
+#if DBG
+  // Compilation may add a prefix to the struct member name:
+  VERIFY_IS_TRUE(0 == strncmp(name, allocaWrites[index].memberName.c_str(), strlen(name)));
+#endif
+}
+
+
+TEST_F(PixTest, PixStructAnnotation_Simple) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+    uint dummy;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.dummy = 42;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);  
+  VERIFY_ARE_EQUAL(32, Testables.OffsetAndSizes[0].size);
+
+  VERIFY_ARE_EQUAL(1, Testables.AllocaWrites.size());
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "dummy");
+}
+
+
+TEST_F(PixTest, PixStructAnnotation_CopiedStruct) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+    uint dummy;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.dummy = 42;
+    smallPayload p2 = p;
+    DispatchMesh(1, 1, 1, p2);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  VERIFY_ARE_EQUAL(2, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);  
+  VERIFY_ARE_EQUAL(32, Testables.OffsetAndSizes[0].size);
+
+  VERIFY_ARE_EQUAL(2, Testables.AllocaWrites.size());
+  // The values in the copy don't have stable names:
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "");
+}
+
+TEST_F(PixTest, PixStructAnnotation_MixedSizes) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+    bool b1;
+    uint16_t sixteen;
+    uint32_t thirtytwo;
+    uint64_t sixtyfour;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.b1 = true;
+    p.sixteen = 16;
+    p.thirtytwo = 32;
+    p.sixtyfour = 64;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(4, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  VERIFY_ARE_EQUAL(32+64+32+16, Testables.OffsetAndSizes[0].size);
+
+  VERIFY_ARE_EQUAL(4, Testables.AllocaWrites.size());
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "b1");
+  ValidateAllocaWrite(Testables.AllocaWrites, 1, "sixteen");
+  ValidateAllocaWrite(Testables.AllocaWrites, 2, "thirtytwo");
+  ValidateAllocaWrite(Testables.AllocaWrites, 3, "sixtyfour");
+}
+
+TEST_F(PixTest, PixStructAnnotation_StructWithinStruct) {
+  const char *hlsl = R"(
+
+struct Contained
+{
+  uint32_t one;
+  uint32_t two;
+};
+
+struct smallPayload
+{
+  uint32_t before;
+  Contained contained;
+  uint32_t after;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.before = 0xb4;
+    p.contained.one = 1;
+    p.contained.two = 2;
+    p.after = 3;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(4, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  VERIFY_ARE_EQUAL(4*32, Testables.OffsetAndSizes[0].size);
+
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "before");
+  ValidateAllocaWrite(Testables.AllocaWrites, 1, "one");
+  ValidateAllocaWrite(Testables.AllocaWrites, 2, "two");
+  ValidateAllocaWrite(Testables.AllocaWrites, 3, "after");
+}
+
+TEST_F(PixTest, PixStructAnnotation_1DArray) {
+    const char* hlsl = R"(
+struct smallPayload
+{
+    uint32_t Array[2];
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.Array[0] = 250;
+    p.Array[1] = 251;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+    auto Testables = TestStructAnnotationCase(hlsl);
+    VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+    VERIFY_ARE_EQUAL(2, Testables.OffsetAndSizes[0].countOfMembers);
+    VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+    VERIFY_ARE_EQUAL(2 * 32, Testables.OffsetAndSizes[0].size);
+}
+
+TEST_F(PixTest, PixStructAnnotation_2DArray) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+    uint32_t TwoDArray[2][3];
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.TwoDArray[0][0] = 250;
+    p.TwoDArray[0][1] = 251;
+    p.TwoDArray[0][2] = 252;
+    p.TwoDArray[1][0] = 253;
+    p.TwoDArray[1][1] = 254;
+    p.TwoDArray[1][2] = 255;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(6, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  VERIFY_ARE_EQUAL(2 * 3 * 32, Testables.OffsetAndSizes[0].size);
+}
+
+TEST_F(PixTest, PixStructAnnotation_EmbeddedArray) {
+  const char *hlsl = R"(
+
+struct Contained
+{
+  uint32_t array[3];
+};
+
+struct smallPayload
+{
+  uint32_t before;
+  Contained contained;
+  uint32_t after;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.before = 0xb4;
+    p.contained.array[0] = 0;
+    p.contained.array[1] = 1;
+    p.contained.array[2] = 2;
+    p.after = 3;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(5, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  VERIFY_ARE_EQUAL(5 * 32, Testables.OffsetAndSizes[0].size);
+
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "before");
+  ValidateAllocaWrite(Testables.AllocaWrites, 1, "array");
+  ValidateAllocaWrite(Testables.AllocaWrites, 2, "array");
+  ValidateAllocaWrite(Testables.AllocaWrites, 3, "array");
+  ValidateAllocaWrite(Testables.AllocaWrites, 4, "after");
+}
+
+TEST_F(PixTest, PixStructAnnotation_FloatN) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+    float2 f2;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.f2 = float2(1,2);
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  // Can't test this until dbg.declare instructions are emitted when structs contain pointers-to-pointers
+  // VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  // VERIFY_ARE_EQUAL(2, Testables.OffsetAndSizes[0].countOfMembers);
+  // VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  // VERIFY_ARE_EQUAL(32 + 32, Testables.OffsetAndSizes[0].size);
+
+  VERIFY_ARE_EQUAL(2, Testables.AllocaWrites.size());
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "member0"); // "memberN" until dbg.declare works
+  ValidateAllocaWrite(Testables.AllocaWrites, 1, "member1"); // "memberN" until dbg.declare works
+}
+
+
+TEST_F(PixTest, PixStructAnnotation_SequentialFloatN) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+    float3 color;
+    float3 dir;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.color = float3(1,2,3);
+    p.dir = float3(4,5,6);
+
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  // Can't test this until dbg.declare instructions are emitted when structs contain pointers-to-pointers
+  // VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  // VERIFY_ARE_EQUAL(2, Testables.OffsetAndSizes[0].countOfMembers);
+  // VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  // VERIFY_ARE_EQUAL(32 + 32, Testables.OffsetAndSizes[0].size);
+
+  VERIFY_ARE_EQUAL(6, Testables.AllocaWrites.size());
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, "member0"); // "memberN" until dbg.declare works
+  ValidateAllocaWrite(Testables.AllocaWrites, 1, "member1"); // "memberN" until dbg.declare works
+  ValidateAllocaWrite(Testables.AllocaWrites, 2, "member2"); // "memberN" until dbg.declare works
+  ValidateAllocaWrite(Testables.AllocaWrites, 3, "member0"); // "memberN" until dbg.declare works
+  ValidateAllocaWrite(Testables.AllocaWrites, 4, "member1"); // "memberN" until dbg.declare works
+  ValidateAllocaWrite(Testables.AllocaWrites, 5, "member2"); // "memberN" until dbg.declare works
+}
+
+TEST_F(PixTest, PixStructAnnotation_EmbeddedFloatN) {
+  const char *hlsl = R"(
+
+struct Embedded
+{
+    float2 f2;
+};
+
+struct smallPayload
+{
+  uint32_t i32;
+  Embedded e;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    p.i32 = 32;
+    p.e.f2 = float2(1,2);
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+
+  // Can't test this until dbg.declare instructions are emitted when structs
+  // contain pointers-to-pointers
+  //VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  //VERIFY_ARE_EQUAL(2, Testables.OffsetAndSizes[0].countOfMembers);
+  //VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  //VERIFY_ARE_EQUAL(32 + 32, Testables.OffsetAndSizes[0].size);
+
+  VERIFY_ARE_EQUAL(3, Testables.AllocaWrites.size());
+  ValidateAllocaWrite(Testables.AllocaWrites, 0, ""); 
+  ValidateAllocaWrite(Testables.AllocaWrites, 1, "member0");
+  ValidateAllocaWrite(Testables.AllocaWrites, 2, "member1");
+}
+
+TEST_F(PixTest, PixStructAnnotation_Matrix) {
+  const char *hlsl = R"(
+struct smallPayload
+{
+  float4x4 mat;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+  smallPayload p;
+  p.mat = float4x4( 1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15, 16);
+  DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+  // Can't test member iterator until dbg.declare instructions are emitted when structs
+  // contain pointers-to-pointers
+  VERIFY_ARE_EQUAL(16, Testables.AllocaWrites.size());
+  for (int i = 0; i < 16; ++i)
+  {
+    ValidateAllocaWrite(Testables.AllocaWrites, i, "");
+  }
+
+}
+
+TEST_F(PixTest, PixStructAnnotation_BigMess) {
+  const char *hlsl = R"(
+
+struct BigStruct
+{
+    uint64_t bigInt;
+    double bigDouble;
+};
+
+struct EmbeddedStruct
+{
+    uint32_t OneInt;
+    uint32_t TwoDArray[2][2];
+};
+
+struct smallPayload
+{
+    uint dummy;
+    uint vertexCount;
+    uint primitiveCount;
+    EmbeddedStruct embeddedStruct;
+#ifdef PAYLOAD_MATRICES
+    float4x4 mat;
+#endif
+    uint64_t bigOne;
+    half littleOne;
+    BigStruct bigStruct[2];
+    uint lastCheck;
+};
+
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    smallPayload p;
+    // Adding enough instructions to make the shader interesting to debug:
+    p.dummy = 42;
+    p.vertexCount = 3;
+    p.primitiveCount = 1;
+    p.embeddedStruct.OneInt = 123;
+    p.embeddedStruct.TwoDArray[0][0] = 252;
+    p.embeddedStruct.TwoDArray[0][1] = 253;
+    p.embeddedStruct.TwoDArray[1][0] = 254;
+    p.embeddedStruct.TwoDArray[1][1] = 255;
+#ifdef PAYLOAD_MATRICES
+    p.mat = float4x4( 1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15, 16);
+#endif
+    p.bigOne = 123456789;
+    p.littleOne = 1.0;
+    p.bigStruct[0].bigInt = 10;
+    p.bigStruct[0].bigDouble = 2.0;
+    p.bigStruct[1].bigInt = 20;
+    p.bigStruct[1].bigDouble = 4.0;
+    p.lastCheck = 27;
+    DispatchMesh(1, 1, 1, p);
+}
+)";
+
+  auto Testables = TestStructAnnotationCase(hlsl);
+  VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes.size());
+  VERIFY_ARE_EQUAL(15, Testables.OffsetAndSizes[0].countOfMembers);
+  VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
+  constexpr uint32_t BigStructBitSize = 64 * 2;
+  constexpr uint32_t EmbeddedStructBitSize = 32 * 5;
+  VERIFY_ARE_EQUAL(3 * 32 + EmbeddedStructBitSize + 64 + 16 + BigStructBitSize*2 + 32, Testables.OffsetAndSizes[0].size);
+}
+
 
 #endif