2
0
Эх сурвалжийг харах

User/jeffnn/pix dontoverwriteoffsetcounter (#2729)

Fix for overflow case
Overloads for StoreVertexOutput
Reformat to remove curly-on-end
Jeff Noyle 5 жил өмнө
parent
commit
be3f3fa2ee

+ 104 - 53
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -31,16 +31,20 @@
 // Keep this in sync with the same-named value in the debugger application's
 // WinPixShaderUtils.h
 constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
+constexpr uint64_t MaxSizePerRecord = 64;
 
 // Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
 constexpr uint32_t triangleIndexIndicator = 1;
 constexpr uint32_t int32ValueIndicator = 2;
 constexpr uint32_t floatValueIndicator = 3;
+constexpr uint32_t int16ValueIndicator = 4;
+constexpr uint32_t float16ValueIndicator = 5;
 
 using namespace llvm;
 using namespace hlsl;
 
-class DxilPIXMeshShaderOutputInstrumentation : public ModulePass {
+class DxilPIXMeshShaderOutputInstrumentation : public ModulePass 
+{
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
@@ -75,15 +79,18 @@ private:
   template <typename... T> void Instrument(BuilderContext &BC, T... values);
 };
 
-void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) {
+void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) 
+{
   GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
 }
 
-uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() {
+uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() 
+{
   return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
 }
 
-CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
+CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) 
+{
   // Set up a UAV with structure of a single int
   unsigned int UAVResourceHandle =
       static_cast<unsigned int>(BC.DM.GetUAVs().size());
@@ -130,7 +137,8 @@ CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
 }
 
 Value *DxilPIXMeshShaderOutputInstrumentation::
-    insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC) {
+    insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
+{
   Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
   Constant *One32Arg = BC.HlslOP->GetU32Const(1);
 
@@ -150,7 +158,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
 }
 
 Value *DxilPIXMeshShaderOutputInstrumentation::
-    insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) {
+    insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) 
+{
   Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
   auto GroupIdFunc =
       BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
@@ -160,9 +169,15 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
 }
 
 Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
-    BuilderContext &BC, uint32_t SpaceInBytes) {
-  assert(m_RemainingReservedSpaceInBytes ==
-         0); // or else the previous caller reserved too much space
+    BuilderContext &BC, uint32_t SpaceInBytes) 
+{
+  
+  // Check the previous caller didn't reserve too much space:
+  assert(m_RemainingReservedSpaceInBytes == 0);
+  
+  // Check that the caller didn't ask for so much memory that it will 
+  // overwrite the offset counter:
+  assert(m_RemainingReservedSpaceInBytes < MaxSizePerRecord);
 
   m_RemainingReservedSpaceInBytes = SpaceInBytes;
 
@@ -173,7 +188,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
       BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
   Constant *AtomicAdd =
       BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
-  Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset());
+  Constant *OffsetArg =
+      BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + MaxSizePerRecord);
   UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
 
   Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
@@ -196,7 +212,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
 }
 
 Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
-    BuilderContext &BC, Value *TheOffset, Value *TheValue) {
+    BuilderContext &BC, Value *TheOffset, Value *TheValue) 
+{
 
   Function *StoreValue =
       BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
@@ -228,18 +245,21 @@ Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
 
 template <typename... T>
 void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
-                                                        T... values) {
+                                                        T... values)
+{
   llvm::SmallVector<llvm::Value *, 10> Values(
       {static_cast<llvm::Value *>(values)...});
   const uint32_t DwordCount = Values.size();
   llvm::Value *byteOffset =
       reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
-  for (llvm::Value *V : Values) {
+  for (llvm::Value *V : Values)
+  {
     byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
   }
 }
 
-bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
+bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
+{
   DxilModule &DM = M.GetOrCreateDxilModule();
   LLVMContext &Ctx = M.getContext();
   OP *HlslOP = DM.GetOP();
@@ -259,7 +279,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
 
   auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
   auto FunctionUses = F->uses();
-  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
+  {
     auto &FunctionUse = *FI++;
     auto FunctionUser = FunctionUse.getUser();
 
@@ -273,50 +294,79 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
                Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
   }
 
-  F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getInt32Ty(Ctx));
-  FunctionUses = F->uses();
-  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
-    auto &FunctionUse = *FI++;
-    auto FunctionUser = FunctionUse.getUser();
-
-    auto Call = cast<CallInst>(FunctionUser);
-
-    IRBuilder<> Builder2(Call);
-    BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
+  struct OutputType
+  {
+    Type *type;
+    uint32_t tag;
+  };
+  SmallVector<OutputType, 4> StoreVertexOutputOverloads
+  {
+    {Type::getInt32Ty(Ctx), int32ValueIndicator},
+    {Type::getInt16Ty(Ctx), int16ValueIndicator}, 
+    {Type::getFloatTy(Ctx), floatValueIndicator},
+    {Type::getHalfTy(Ctx), float16ValueIndicator}
+  };
 
+  for (auto const &Overload : StoreVertexOutputOverloads)
+  {
+    F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Overload.type);
+    FunctionUses = F->uses();
+    for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
     {
-      auto expandBits = BC2.Builder.CreateCast(
-          Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
-
-      Instrument(BC2, BC2.HlslOP->GetI32Const(int32ValueIndicator),
-                 GroupIdXandY, GroupIdZ, Call->getOperand(1),
-                 Call->getOperand(2), expandBits, Call->getOperand(4),
-                 Call->getOperand(5));
-    }
-  }
+      auto &FunctionUse = *FI++;
+      auto FunctionUser = FunctionUse.getUser();
 
-  F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getFloatTy(Ctx));
-  FunctionUses = F->uses();
-  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
-    auto &FunctionUse = *FI++;
-    auto FunctionUser = FunctionUse.getUser();
+      auto Call = cast<CallInst>(FunctionUser);
 
-    auto Call = cast<CallInst>(FunctionUser);
+      IRBuilder<> Builder2(Call);
+      BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
 
-    IRBuilder<> Builder2(Call);
-    BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
+      // Expand column index to 32 bits:
+      auto ColumnIndex = BC2.Builder.CreateCast(
+       Instruction::ZExt, 
+        Call->getOperand(3), 
+        Type::getInt32Ty(Ctx));
 
-    {
-      auto expandBits = BC2.Builder.CreateCast(
-          Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
-
-      auto reinterpretFloatToInt = BC2.Builder.CreateCast(
-          Instruction::BitCast, Call->getOperand(4), Type::getInt32Ty(Ctx));
+      // Coerce actual value to int32 
+      Value *CoercedValue = Call->getOperand(4);
 
-      Instrument(BC2, BC2.HlslOP->GetI32Const(floatValueIndicator),
-                 GroupIdXandY, GroupIdZ, Call->getOperand(1),
-                 Call->getOperand(2), expandBits, reinterpretFloatToInt,
-                 Call->getOperand(5));
+      if (Overload.tag == floatValueIndicator) 
+      {
+        CoercedValue = BC2.Builder.CreateCast(
+          Instruction::BitCast,
+          CoercedValue, 
+          Type::getInt32Ty(Ctx));
+      }
+      else if (Overload.tag == float16ValueIndicator) 
+      {
+        auto * HalfInt = BC2.Builder.CreateCast(
+          Instruction::BitCast, 
+          CoercedValue, 
+          Type::getInt16Ty(Ctx));
+
+        CoercedValue = BC2.Builder.CreateCast(
+          Instruction::ZExt, 
+          HalfInt, 
+          Type::getInt32Ty(Ctx));
+      }
+      else if (Overload.tag == int16ValueIndicator) 
+      {
+        CoercedValue = BC2.Builder.CreateCast(
+          Instruction::ZExt,
+          CoercedValue,
+          Type::getInt32Ty(Ctx));
+      }
+
+      Instrument(
+        BC2, 
+        BC2.HlslOP->GetI32Const(Overload.tag),
+        GroupIdXandY,
+        GroupIdZ, 
+        Call->getOperand(1),
+        Call->getOperand(2),
+        ColumnIndex,
+        CoercedValue,
+        Call->getOperand(5));
     }
   }
 
@@ -327,7 +377,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
 
 char DxilPIXMeshShaderOutputInstrumentation::ID = 0;
 
-ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() {
+ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation()
+{
   return new DxilPIXMeshShaderOutputInstrumentation();
 }