Browse Source

Pix mesh shader output instrumentation (#2709)

This is a pass for PIX that adds instructions to write mesh shader output (vertices and indices) to a UAV for later ingestion by PIX in order to present a view of that output.
Jeff Noyle 5 năm trước cách đây
mục cha
commit
eb33030b03

+ 2 - 0
include/dxc/DxilPIXPasses/DxilPIXPasses.h

@@ -19,6 +19,7 @@ ModulePass *createDxilAddPixelHitInstrumentationPass();
 ModulePass *createDxilDbgValueToDbgDeclarePass();
 ModulePass *createDxilAnnotateWithVirtualRegisterPass();
 ModulePass *createDxilOutputColorBecomesConstantPass();
+ModulePass *createDxilDxilPIXMeshShaderOutputInstrumentation();
 ModulePass *createDxilRemoveDiscardsPass();
 ModulePass *createDxilReduceMSAAToSingleSamplePass();
 ModulePass *createDxilForceEarlyZPass();
@@ -29,6 +30,7 @@ void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry&);
 void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry&);
 void initializeDxilAnnotateWithVirtualRegisterPass(llvm::PassRegistry&);
 void initializeDxilOutputColorBecomesConstantPass(llvm::PassRegistry&);
+void initializeDxilPIXMeshShaderOutputInstrumentationPass(llvm::PassRegistry &);
 void initializeDxilRemoveDiscardsPass(llvm::PassRegistry&);
 void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&);
 void initializeDxilForceEarlyZPass(llvm::PassRegistry&);

+ 1 - 0
lib/DxilPIXPasses/CMakeLists.txt

@@ -7,6 +7,7 @@ add_llvm_library(LLVMDxilPIXPasses
   DxilDebugInstrumentation.cpp
   DxilForceEarlyZ.cpp
   DxilOutputColorBecomesConstant.cpp
+  DxilPIXMeshShaderOutputInstrumentation.cpp
   DxilRemoveDiscards.cpp
   DxilReduceMSAAToSingleSample.cpp
   DxilShaderAccessTracking.cpp

+ 336 - 0
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -0,0 +1,336 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilAddPixelHitInstrumentation.cpp                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides a pass to add instrumentation to retrieve mesh shader output.    //
+// Used by PIX.                                                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilUtil.h"
+
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/DxilSpanAllocator.h"
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <deque>
+
+#ifdef _WIN32
+#include <winerror.h>
+#endif
+
+// Keep this in sync with the same-named value in the debugger application's
+// WinPixShaderUtils.h
+constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
+
+// Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
+constexpr uint32_t triangleIndexIndicator = 1;
+constexpr uint32_t int32ValueIndicator = 2;
+constexpr uint32_t floatValueIndicator = 3;
+
+using namespace llvm;
+using namespace hlsl;
+
+class DxilPIXMeshShaderOutputInstrumentation : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
+  const char *getPassName() const override {
+    return "DXIL mesh shader output instrumentation";
+  }
+  void applyOptions(PassOptions O) override;
+  bool runOnModule(Module &M) override;
+
+private:
+  CallInst *m_OutputUAV = nullptr;
+  int m_RemainingReservedSpaceInBytes = 0;
+  Constant *m_OffsetMask = nullptr;
+
+  uint64_t m_UAVSize = 1024 * 1024;
+
+  struct BuilderContext {
+    Module &M;
+    DxilModule &DM;
+    LLVMContext &Ctx;
+    OP *HlslOP;
+    IRBuilder<> &Builder;
+  };
+
+  CallInst *addUAV(BuilderContext &BC);
+  Value *insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC);
+  Value *insertInstructionsToCalculateGroupIdZ(BuilderContext &BC);
+  Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes);
+  uint32_t UAVDumpingGroundOffset();
+  Value *writeDwordAndReturnNewOffset(BuilderContext &BC, Value *TheOffset,
+                                      Value *TheValue);
+  template <typename... T> void Instrument(BuilderContext &BC, T... values);
+};
+
+void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) {
+  GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
+}
+
+uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() {
+  return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
+}
+
+CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
+  // Set up a UAV with structure of a single int
+  unsigned int UAVResourceHandle =
+      static_cast<unsigned int>(BC.DM.GetUAVs().size());
+  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
+  llvm::StructType *UAVStructTy =
+      llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
+  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
+  pUAV->SetGlobalName("PIX_DebugUAVName");
+  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
+  pUAV->SetID(UAVResourceHandle);
+  pUAV->SetSpaceID(
+      (unsigned int)-2); // This is the reserved-for-tools register space
+  pUAV->SetSampleCount(1);
+  pUAV->SetGloballyCoherent(false);
+  pUAV->SetHasCounter(false);
+  pUAV->SetCompType(CompType::getI32());
+  pUAV->SetLowerBound(0);
+  pUAV->SetRangeSize(1);
+  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
+  pUAV->SetRW(true);
+
+  auto ID = BC.DM.AddUAV(std::move(pUAV));
+  assert(ID == UAVResourceHandle);
+
+  BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
+
+  // Create handle for the newly-added UAV
+  Function *CreateHandleOpFunc =
+      BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
+  Constant *CreateHandleOpcodeArg =
+      BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
+  Constant *UAVVArg = BC.HlslOP->GetI8Const(
+      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
+          DXIL::ResourceClass::UAV));
+  Constant *MetaDataArg = BC.HlslOP->GetU32Const(
+      ID); // position of the metadata record in the corresponding metadata list
+  Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
+  Constant *FalseArg =
+      BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
+  return BC.Builder.CreateCall(
+      CreateHandleOpFunc,
+      {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
+      "PIX_DebugUAV_Handle");
+}
+
+Value *DxilPIXMeshShaderOutputInstrumentation::
+    insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC) {
+  Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
+  Constant *One32Arg = BC.HlslOP->GetU32Const(1);
+
+  auto GroupIdFunc =
+      BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
+  Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
+  auto GroupIdX =
+      BC.Builder.CreateCall(GroupIdFunc, {Opcode, Zero32Arg}, "GroupIdX");
+  auto GroupIdY =
+      BC.Builder.CreateCall(GroupIdFunc, {Opcode, One32Arg}, "GroupIdY");
+
+  // Spec requires that no group id index is greater than 64k, so we can 
+  // combine two into one 32-bit value:
+  auto YShifted =
+      BC.Builder.CreateShl(GroupIdY, 16);
+  return BC.Builder.CreateAdd(YShifted, GroupIdX);
+}
+
+Value *DxilPIXMeshShaderOutputInstrumentation::
+    insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) {
+  Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
+  auto GroupIdFunc =
+      BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
+  Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
+
+  return BC.Builder.CreateCall(GroupIdFunc, {Opcode, Two32Arg}, "GroupIdZ");
+}
+
+Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
+    BuilderContext &BC, uint32_t SpaceInBytes) {
+  assert(m_RemainingReservedSpaceInBytes ==
+         0); // or else the previous caller reserved too much space
+
+  m_RemainingReservedSpaceInBytes = SpaceInBytes;
+
+  // Insert the UAV increment instruction:
+  Function *AtomicOpFunc =
+      BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
+  Constant *AtomicBinOpcode =
+      BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
+  Constant *AtomicAdd =
+      BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
+  Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset());
+  UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
+
+  Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
+
+  auto *PreviousValue = BC.Builder.CreateCall(
+      AtomicOpFunc,
+      {
+          AtomicBinOpcode, // i32, ; opcode
+          m_OutputUAV,     // %dx.types.Handle, ; resource handle
+          AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
+                     // XOR, IMIN, IMAX, UMIN, UMAX
+          OffsetArg, // i32, ; coordinate c0: index in bytes
+          UndefArg,  // i32, ; coordinate c1 (unused)
+          UndefArg,  // i32, ; coordinate c2 (unused)
+          Increment, // i32); increment value
+      },
+      "UAVIncResult");
+
+  return BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
+}
+
+Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
+    BuilderContext &BC, Value *TheOffset, Value *TheValue) {
+
+  Function *StoreValue =
+      BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
+  Constant *StoreValueOpcode =
+      BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
+  UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
+  Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
+
+  (void)BC.Builder.CreateCall(
+      StoreValue,
+      {StoreValueOpcode, // i32 opcode
+       m_OutputUAV,      // %dx.types.Handle, ; resource handle
+       TheOffset,        // i32 c0: index in bytes into UAV
+       Undef32Arg,       // i32 c1: unused
+       TheValue,
+       Undef32Arg, // unused values
+       Undef32Arg, // unused values
+       Undef32Arg, // unused values
+       WriteMask_X});
+
+  m_RemainingReservedSpaceInBytes -= sizeof(uint32_t);
+  assert(m_RemainingReservedSpaceInBytes >=
+         0); // or else the caller didn't reserve enough space
+
+  return BC.Builder.CreateAdd(
+      TheOffset,
+      BC.HlslOP->GetU32Const(static_cast<unsigned int>(sizeof(uint32_t))));
+}
+
+template <typename... T>
+void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
+                                                        T... values) {
+  llvm::SmallVector<llvm::Value *, 10> Values(
+      {static_cast<llvm::Value *>(values)...});
+  const uint32_t DwordCount = Values.size();
+  llvm::Value *byteOffset =
+      reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
+  for (llvm::Value *V : Values) {
+    byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
+  }
+}
+
+bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
+  DxilModule &DM = M.GetOrCreateDxilModule();
+  LLVMContext &Ctx = M.getContext();
+  OP *HlslOP = DM.GetOP();
+
+  Instruction *firstInsertionPt =
+      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
+  IRBuilder<> Builder(firstInsertionPt);
+
+  BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
+
+  m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
+
+  m_OutputUAV = addUAV(BC);
+
+  auto GroupIdXandY = insertInstructionsToCalculateFlattenedGroupIdXandY(BC);
+  auto GroupIdZ = insertInstructionsToCalculateGroupIdZ(BC);
+
+  auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
+  auto FunctionUses = F->uses();
+  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+    auto &FunctionUse = *FI++;
+    auto FunctionUser = FunctionUse.getUser();
+
+    auto Call = cast<CallInst>(FunctionUser);
+
+    IRBuilder<> Builder2(Call);
+    BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
+
+    Instrument(BC2, BC2.HlslOP->GetI32Const(triangleIndexIndicator),
+               GroupIdXandY, GroupIdZ, Call->getOperand(1),
+               Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
+  }
+
+  F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getInt32Ty(Ctx));
+  FunctionUses = F->uses();
+  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+    auto &FunctionUse = *FI++;
+    auto FunctionUser = FunctionUse.getUser();
+
+    auto Call = cast<CallInst>(FunctionUser);
+
+    IRBuilder<> Builder2(Call);
+    BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
+
+    {
+      auto expandBits = BC2.Builder.CreateCast(
+          Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
+
+      Instrument(BC2, BC2.HlslOP->GetI32Const(int32ValueIndicator),
+                 GroupIdXandY, GroupIdZ, Call->getOperand(1),
+                 Call->getOperand(2), expandBits, Call->getOperand(4),
+                 Call->getOperand(5));
+    }
+  }
+
+  F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getFloatTy(Ctx));
+  FunctionUses = F->uses();
+  for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+    auto &FunctionUse = *FI++;
+    auto FunctionUser = FunctionUse.getUser();
+
+    auto Call = cast<CallInst>(FunctionUser);
+
+    IRBuilder<> Builder2(Call);
+    BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
+
+    {
+      auto expandBits = BC2.Builder.CreateCast(
+          Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
+
+      auto reinterpretFloatToInt = BC2.Builder.CreateCast(
+          Instruction::BitCast, Call->getOperand(4), Type::getInt32Ty(Ctx));
+
+      Instrument(BC2, BC2.HlslOP->GetI32Const(floatValueIndicator),
+                 GroupIdXandY, GroupIdZ, Call->getOperand(1),
+                 Call->getOperand(2), expandBits, reinterpretFloatToInt,
+                 Call->getOperand(5));
+    }
+  }
+
+  DM.ReEmitDxilResources();
+
+  return true;
+}
+
+char DxilPIXMeshShaderOutputInstrumentation::ID = 0;
+
+ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() {
+  return new DxilPIXMeshShaderOutputInstrumentation();
+}
+
+INITIALIZE_PASS(DxilPIXMeshShaderOutputInstrumentation,
+                "hlsl-dxil-pix-meshshader-output-instrumentation",
+                "DXIL mesh shader output instrumentation for PIX", false, false)

+ 1 - 0
lib/DxilPIXPasses/DxilPIXPasses.cpp

@@ -34,6 +34,7 @@ HRESULT SetupRegistryPassForPIX() {
     initializeDxilDebugInstrumentationPass(Registry);
     initializeDxilForceEarlyZPass(Registry);
     initializeDxilOutputColorBecomesConstantPass(Registry);
+    initializeDxilPIXMeshShaderOutputInstrumentationPass(Registry);
     initializeDxilReduceMSAAToSingleSamplePass(Registry);
     initializeDxilRemoveDiscardsPass(Registry);
     initializeDxilShaderAccessTrackingPass(Registry);

+ 4 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -201,6 +201,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "UAVSize", "parameter0", "parameter1", "parameter2" };
   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
+  static const LPCSTR DxilPIXMeshShaderOutputInstrumentationArgs[] = { "UAVSize" };
   static const LPCSTR DxilShaderAccessTrackingArgs[] = { "config", "checkForDynamicIndexing" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "ReplaceAllVectors" };
   static const LPCSTR Float2IntArgs[] = { "float2int-max-integer-bw" };
@@ -235,6 +236,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
+  if (strcmp(passName, "hlsl-dxil-pix-meshshader-output-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilPIXMeshShaderOutputInstrumentationArgs, _countof(DxilPIXMeshShaderOutputInstrumentationArgs));
   if (strcmp(passName, "hlsl-dxil-pix-shader-access-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilShaderAccessTrackingArgs, _countof(DxilShaderAccessTrackingArgs));
   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
   if (strcmp(passName, "float2int") == 0) return ArrayRef<LPCSTR>(Float2IntArgs, _countof(Float2IntArgs));
@@ -276,6 +278,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "None", "None", "None", "None" };
   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
+  static const LPCSTR DxilPIXMeshShaderOutputInstrumentationArgs[] = { "None" };
   static const LPCSTR DxilShaderAccessTrackingArgs[] = { "None", "None" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "None" };
   static const LPCSTR Float2IntArgs[] = { "Max integer bitwidth to consider in float2int" };
@@ -310,6 +313,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
+  if (strcmp(passName, "hlsl-dxil-pix-meshshader-output-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilPIXMeshShaderOutputInstrumentationArgs, _countof(DxilPIXMeshShaderOutputInstrumentationArgs));
   if (strcmp(passName, "hlsl-dxil-pix-shader-access-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilShaderAccessTrackingArgs, _countof(DxilShaderAccessTrackingArgs));
   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
   if (strcmp(passName, "float2int") == 0) return ArrayRef<LPCSTR>(Float2IntArgs, _countof(Float2IntArgs));

+ 2 - 0
utils/hct/hctdb.py

@@ -1943,6 +1943,8 @@ class db_dxil(object):
             {'n':'constant-alpha','t':'float','c':1}])
         add_pass('hlsl-dxil-remove-discards', 'DxilRemoveDiscards', 'HLSL DXIL Remove all discard instructions', [])
         add_pass('hlsl-dxil-force-early-z', 'DxilForceEarlyZ', 'HLSL DXIL Force the early Z global flag, if shader has no discard calls', [])
+        add_pass('hlsl-dxil-pix-meshshader-output-instrumentation', 'DxilPIXMeshShaderOutputInstrumentation', 'DXIL mesh shader output instrumentation for PIX', [
+            {'n':'UAVSize','t':'int','c':1}])
         add_pass('hlsl-dxil-pix-shader-access-instrumentation', 'DxilShaderAccessTracking', 'HLSL DXIL shader access tracking for PIX', [
             {'n':'config','t':'int','c':1},
             {'n':'checkForDynamicIndexing','t':'bool','c':1}])