瀏覽代碼

New pass for PIX "shader access tracking" (#998)

* py changes

* checkpoint

* wrong overloads for a couple ops

* Correct out-of-range checking

* test

* RTV

* Add shader access tracking and reportage of dynamically indexed registers

* Uniquify and encode proper register index!

* remove unncessary noise

* rename a var
Jeff Noyle 7 年之前
父節點
當前提交
ad3e51b1dd

+ 1 - 1
external/effcee

@@ -1 +1 @@
-Subproject commit 2741bade14f1ab23f3b90f0e5c77c6b935fc2fff
+Subproject commit 4a6edb2f740b9b87b04306a7815f42de5ca149a4

+ 2 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -68,6 +68,7 @@ ModulePass *createDxilRemoveDiscardsPass();
 ModulePass *createDxilReduceMSAAToSingleSamplePass();
 ModulePass *createDxilForceEarlyZPass();
 ModulePass *createDxilDebugInstrumentationPass();
+ModulePass *createDxilShaderAccessTrackingPass();
 ModulePass *createDxilTranslateRawBuffer();
 ModulePass *createNoPausePassesPass();
 ModulePass *createPausePassesPass();
@@ -99,6 +100,7 @@ void initializeDxilTranslateRawBufferPass(llvm::PassRegistry&);
 void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&);
 void initializeDxilForceEarlyZPass(llvm::PassRegistry&);
 void initializeDxilDebugInstrumentationPass(llvm::PassRegistry&);
+void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry&);
 void initializeNoPausePassesPass(llvm::PassRegistry&);
 void initializePausePassesPass(llvm::PassRegistry&);
 void initializeResumePassesPass(llvm::PassRegistry&);

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -31,6 +31,7 @@ add_llvm_library(LLVMHLSL
   DxilRootSignature.cpp
   DxilSampler.cpp
   DxilSemantic.cpp
+  DxilShaderAccessTracking.cpp
   DxilShaderModel.cpp
   DxilSignature.cpp
   DxilSignatureElement.cpp

+ 7 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -104,6 +104,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilPreserveAllOutputsPass(Registry);
     initializeDxilReduceMSAAToSingleSamplePass(Registry);
     initializeDxilRemoveDiscardsPass(Registry);
+    initializeDxilShaderAccessTrackingPass(Registry);
     initializeDxilTranslateRawBufferPass(Registry);
     initializeDynamicIndexingVectorToArrayPass(Registry);
     initializeEarlyCSELegacyPassPass(Registry);
@@ -187,6 +188,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "UAVSize", "parameter0", "parameter1", "parameter2" };
   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
+  static const LPCSTR DxilShaderAccessTrackingArgs[] = { "config", "checkForDynamicIndexing" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "ReplaceAllVectors" };
   static const LPCSTR Float2IntArgs[] = { "float2int-max-integer-bw" };
   static const LPCSTR GVNArgs[] = { "noloads", "enable-pre", "enable-load-pre", "max-recurse-depth" };
@@ -219,6 +221,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
+  if (strcmp(passName, "hlsl-dxil-pix-shader-access-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilShaderAccessTrackingArgs, _countof(DxilShaderAccessTrackingArgs));
   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
   if (strcmp(passName, "float2int") == 0) return ArrayRef<LPCSTR>(Float2IntArgs, _countof(Float2IntArgs));
   if (strcmp(passName, "gvn") == 0) return ArrayRef<LPCSTR>(GVNArgs, _countof(GVNArgs));
@@ -258,6 +261,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "None", "None", "None", "None" };
   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
+  static const LPCSTR DxilShaderAccessTrackingArgs[] = { "None", "None" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "None" };
   static const LPCSTR Float2IntArgs[] = { "Max integer bitwidth to consider in float2int" };
   static const LPCSTR GVNArgs[] = { "None", "None", "None", "Max recurse depth" };
@@ -290,6 +294,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
+  if (strcmp(passName, "hlsl-dxil-pix-shader-access-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilShaderAccessTrackingArgs, _countof(DxilShaderAccessTrackingArgs));
   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
   if (strcmp(passName, "float2int") == 0) return ArrayRef<LPCSTR>(Float2IntArgs, _countof(Float2IntArgs));
   if (strcmp(passName, "gvn") == 0) return ArrayRef<LPCSTR>(GVNArgs, _countof(GVNArgs));
@@ -343,6 +348,8 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("UAVSize")
     ||  S.equals("add-pixel-cost")
     ||  S.equals("bonus-inst-threshold")
+    ||  S.equals("checkForDynamicIndexing")
+    ||  S.equals("config")
     ||  S.equals("constant-alpha")
     ||  S.equals("constant-blue")
     ||  S.equals("constant-green")

+ 2 - 0
lib/HLSL/DxilReduceMSAAToSingleSample.cpp

@@ -38,6 +38,8 @@ bool DxilReduceMSAAToSingleSample::runOnModule(Module &M)
   OP *HlslOP = DM.GetOP();
 
   // FP16 type doesn't have its own identity, and is covered by float type... 
+
+
   auto TextureLoadOverloads = std::vector<Type*>{ Type::getFloatTy(Ctx), Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx) };
 
   bool Modified = false;

+ 633 - 0
lib/HLSL/DxilShaderAccessTracking.cpp

@@ -0,0 +1,633 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilShaderAccessTracking.cpp                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides a pass to add instrumentation to determine pixel hit count and   //
+// cost. Used by PIX.                                                        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/DxilOperations.h"
+#include "dxc/HLSL/DxilSignatureElement.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/Support/Global.h"
+#include "dxc/HLSL/DxilTypeSystem.h"
+#include "dxc/HLSL/DxilConstants.h"
+#include "dxc/HLSL/DxilInstructions.h"
+#include "dxc/HLSL/DxilSpanAllocator.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <memory>
+#include <map>
+#include <deque>
+
+#include <winerror.h>
+
+using namespace llvm;
+using namespace hlsl;
+
+
+void ThrowIf(bool a)
+{
+  if (a) {
+    throw ::hlsl::Exception(E_INVALIDARG);
+  }
+}
+
+//---------------------------------------------------------------------------------------------------------------------------------
+// These types are taken from PIX's ShaderAccessHelpers.h
+
+enum class ShaderAccessFlags : uint32_t
+{
+  None = 0,
+  Read = 1 << 0,
+  Write = 1 << 1,
+
+  // "Counter" access is only applicable to UAVs; it means the counter buffer attached to the UAV
+  // was accessed, but not necessarily the UAV resource.
+  Counter = 1 << 2
+};
+
+enum class RegisterType
+{
+  CBV,
+  SRV,
+  UAV,
+  RTV,
+  DSV,
+  Sampler,
+  SOV,
+  Invalid,
+  Terminator
+};
+
+RegisterType RegisterTypeFromResourceClass(DXIL::ResourceClass c) {
+  switch (c)
+  {
+  case DXIL::ResourceClass::SRV    : return RegisterType::SRV    ; break;
+  case DXIL::ResourceClass::UAV    : return RegisterType::UAV    ; break;
+  case DXIL::ResourceClass::CBuffer: return RegisterType::CBV    ; break;
+  case DXIL::ResourceClass::Sampler: return RegisterType::Sampler; break;
+  case DXIL::ResourceClass::Invalid: return RegisterType::Invalid; break;
+  default:
+    ThrowIf(true);
+    return RegisterType::Invalid;
+  }
+}
+
+struct RegisterTypeAndSpace
+{
+  bool operator < (const RegisterTypeAndSpace & o) const {
+    return static_cast<int>(Type) < static_cast<int>(o.Type) ||
+      (static_cast<int>(Type) == static_cast<int>(o.Type) && Space < o.Space);
+  }
+  RegisterType Type;
+  unsigned     Space;
+};
+
+// Identifies a bind point as defined by the root signature
+struct RSRegisterIdentifier
+{
+  RegisterType Type;
+  unsigned     Space;
+  unsigned     Index;
+
+  bool operator < (const RSRegisterIdentifier & o) const {
+    return
+      static_cast<unsigned>(Type) < static_cast<unsigned>(o.Type) &&
+      Space < o.Space &&
+      Index < o.Index;
+  }
+};
+
+struct SlotRange
+{
+  unsigned startSlot;
+  unsigned numSlots;
+
+  // Number of slots needed if no descriptors from unbounded ranges are included
+  unsigned numInvariableSlots;
+};
+
+
+struct DxilResourceAndClass {
+  DxilResourceBase * resource;
+  Value * index;
+  DXIL::ResourceClass resClass;
+};
+
+//---------------------------------------------------------------------------------------------------------------------------------
+
+class DxilShaderAccessTracking : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilShaderAccessTracking() : ModulePass(ID) {}
+  const char *getPassName() const override { return "DXIL shader access tracking"; }
+  bool runOnModule(Module &M) override;
+  void applyOptions(PassOptions O) override;
+
+private:
+  void EmitAccess(LLVMContext & Ctx, OP *HlslOP, IRBuilder<> &, Value *slot, ShaderAccessFlags access);
+  bool EmitResourceAccess(DxilResourceAndClass &res, Instruction * instruction, OP * HlslOP, LLVMContext & Ctx, ShaderAccessFlags readWrite);
+
+private:
+  bool m_CheckForDynamicIndexing = false;
+  std::vector<std::pair<RSRegisterIdentifier, ShaderAccessFlags>> m_limitedAccessOutputs;
+  std::map<RegisterTypeAndSpace, SlotRange> m_slotAssignments;
+  CallInst *m_HandleForUAV;
+  std::set<RSRegisterIdentifier> m_DynamicallyIndexedBindPoints;
+};
+
+static unsigned DeserializeInt(std::deque<char> & q) {
+  unsigned i = 0;
+
+  while(!q.empty() && isdigit(q.front()))
+  {
+    i *= 10;
+    i += q.front() - '0';
+    q.pop_front();
+  }
+  return i;
+}
+
+static char DequeFront(std::deque<char> & q) {
+  ThrowIf(q.empty());
+  auto c = q.front();
+  q.pop_front();
+  return c;
+}
+
+static RegisterType ParseRegisterType(std::deque<char> & q) {
+  switch (DequeFront(q))
+  {
+  case 'C': return RegisterType::CBV;
+  case 'S': return RegisterType::SRV;
+  case 'U': return RegisterType::UAV;
+  case 'R': return RegisterType::RTV;
+  case 'D': return RegisterType::DSV;
+  case 'M': return RegisterType::Sampler;
+  case 'O': return RegisterType::SOV;
+  case 'I': return RegisterType::Invalid;
+  default: return RegisterType::Terminator;
+  }
+}
+
+static char EncodeRegisterType(RegisterType r) {
+  switch (r)
+  {
+  case RegisterType::CBV:     return 'C';
+  case RegisterType::SRV:     return 'S';
+  case RegisterType::UAV:     return 'U';
+  case RegisterType::RTV:     return 'R';
+  case RegisterType::DSV:     return 'D';
+  case RegisterType::Sampler: return 'M';
+  case RegisterType::SOV:     return 'O';
+  case RegisterType::Invalid: return 'I';
+  }
+  return '.';
+};
+
+static void ValidateDelimiter(std::deque<char> & q, char d) {
+  ThrowIf(q.front() != d);
+  q.pop_front();
+}
+
+void DxilShaderAccessTracking::applyOptions(PassOptions O) {
+  int checkForDynamic;
+  GetPassOptionInt(O, "checkForDynamicIndexing", &checkForDynamic, 0);
+  m_CheckForDynamicIndexing = checkForDynamic != 0;
+
+  StringRef configOption;
+  if (GetPassOption(O, "config", &configOption)) {
+    std::deque<char> config;
+    config.assign(configOption.begin(), configOption.end());
+
+    // Parse slot assignments. Compare with PIX's ShaderAccessHelpers.cpp (TrackingConfiguration::SerializedRepresentation)
+    RegisterType rt = ParseRegisterType(config);
+    while (rt != RegisterType::Terminator) {
+
+      RegisterTypeAndSpace rst;
+      rst.Type = rt;
+
+      rst.Space = DeserializeInt(config);
+      ValidateDelimiter(config, ':');
+
+      SlotRange sr;
+      sr.startSlot = DeserializeInt(config);
+      ValidateDelimiter(config, ':');
+
+      sr.numSlots = DeserializeInt(config);
+      ValidateDelimiter(config, 'i');
+
+      sr.numInvariableSlots = DeserializeInt(config);
+      ValidateDelimiter(config, ';');
+
+      m_slotAssignments[rst] = sr;
+
+      rt = ParseRegisterType(config);
+    }
+
+    // Parse limited access outputs
+    rt = ParseRegisterType(config);
+    while (rt != RegisterType::Terminator) {
+
+      RSRegisterIdentifier rid;
+      rid.Type = rt;
+
+      rid.Space = DeserializeInt(config);
+      ValidateDelimiter(config, ':');
+
+      rid.Index = DeserializeInt(config);
+      ValidateDelimiter(config, ':');
+
+      unsigned AccessFlags = DeserializeInt(config);
+      ValidateDelimiter(config, ';');
+
+      m_limitedAccessOutputs.emplace_back(rid, static_cast<ShaderAccessFlags>(AccessFlags));
+
+      rt = ParseRegisterType(config);
+    }
+  }
+}
+
+void DxilShaderAccessTracking::EmitAccess(LLVMContext & Ctx, OP *HlslOP, IRBuilder<> & Builder, Value * slot, ShaderAccessFlags access)
+{
+  // Slots are four bytes each:
+  auto ByteIndex = Builder.CreateMul(slot, HlslOP->GetU32Const(4));
+
+  // Insert the UAV increment instruction:
+
+  Function* AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
+  Constant* AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
+  Constant* AtomicOr = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Or);
+
+  Constant* AccessValue = HlslOP->GetU32Const(static_cast<unsigned>(access));
+  UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+
+  (void)Builder.CreateCall(AtomicOpFunc, {
+      AtomicBinOpcode,// i32, ; opcode
+      m_HandleForUAV, // %dx.types.Handle, ; resource handle
+      AtomicOr,       // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
+      ByteIndex,      // i32, ; coordinate c0: byte offset
+      UndefArg,       // i32, ; coordinate c1 (unused)
+      UndefArg,       // i32, ; coordinate c2 (unused)
+      AccessValue     // i32) ; OR value
+  }, "UAVOrResult");
+}
+
+bool DxilShaderAccessTracking::EmitResourceAccess(DxilResourceAndClass &res, Instruction * instruction, OP * HlslOP, LLVMContext & Ctx, ShaderAccessFlags readWrite) {
+
+  RegisterTypeAndSpace typeAndSpace{ RegisterTypeFromResourceClass(res.resClass), res.resource->GetSpaceID() };
+
+  auto slot = m_slotAssignments.find(typeAndSpace);
+  // If the assignment isn't found, we assume it's not accessed
+  if (slot != m_slotAssignments.end()) {
+
+    IRBuilder<> Builder(instruction);
+    Value * slotIndex;
+
+    if (isa<ConstantInt>(res.index)) {
+      unsigned index = cast<ConstantInt>(res.index)->getLimitedValue();
+      if (index > slot->second.numSlots) {
+        // out-of-range accesses are written to slot zero:
+        slotIndex = HlslOP->GetU32Const(0);
+      }
+      else {
+        slotIndex = HlslOP->GetU32Const(slot->second.startSlot + index);
+      }
+    }
+    else {
+      RSRegisterIdentifier id{ typeAndSpace.Type, typeAndSpace.Space,  res.resource->GetID() };
+      m_DynamicallyIndexedBindPoints.emplace(std::move(id));
+
+
+      // CompareWithSlotLimit will contain 1 if the access is out-of-bounds (both over- and and under-flow 
+      // via the unsigned >= with slot count)
+      auto CompareWithSlotLimit = Builder.CreateICmpUGE(res.index, HlslOP->GetU32Const(slot->second.numSlots), "CompareWithSlotLimit");
+      auto CompareWithSlotLimitAsUint = Builder.CreateCast(Instruction::CastOps::ZExt, CompareWithSlotLimit, Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint");
+
+      // IsInBounds will therefore contain 0 if the access is out-of-bounds, and 1 otherwise.
+      auto IsInBounds = Builder.CreateSub(HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds");
+
+      auto SlotOffset = Builder.CreateAdd(res.index, HlslOP->GetU32Const(slot->second.startSlot), "SlotOffset");
+
+      // This will drive an out-of-bounds access slot down to 0
+      slotIndex = Builder.CreateMul(SlotOffset, IsInBounds, "slotIndex");
+    }
+
+    EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite);
+
+    return true; // did modify
+  }
+  return false; // did not modify
+}
+
+
+DxilResourceAndClass GetResourceFromHandle(Value * resHandle, DxilModule &DM) {
+
+  DxilResourceAndClass ret{ nullptr, nullptr, DXIL::ResourceClass::Invalid };
+
+  CallInst *handle = cast<CallInst>(resHandle);
+  DxilInst_CreateHandle createHandle(handle);
+
+
+  // Dynamic rangeId is not supported - skip and let validation report the
+  // error.
+  if (!isa<ConstantInt>(createHandle.get_rangeId()))
+    return ret;
+
+  unsigned rangeId =
+    cast<ConstantInt>(createHandle.get_rangeId())->getLimitedValue();
+
+  auto resClass = static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
+
+  switch (resClass) {
+  case DXIL::ResourceClass::SRV:
+    ret.resource = &DM.GetSRV(rangeId);
+    break;
+  case DXIL::ResourceClass::UAV:
+    ret.resource = &DM.GetUAV(rangeId);
+    break;
+  case DXIL::ResourceClass::CBuffer:
+    ret.resource = &DM.GetCBuffer(rangeId);
+    break;
+  case DXIL::ResourceClass::Sampler:
+    ret.resource = &DM.GetSampler(rangeId);
+    break;
+  default:
+    DXASSERT(0, "invalid res class");
+    return ret;
+  }
+
+  ret.index = createHandle.get_index();
+  ret.resClass = resClass;
+
+  return ret;
+}
+
+bool DxilShaderAccessTracking::runOnModule(Module &M)
+{
+  // This pass adds instrumentation for shader access to resources
+
+  DxilModule &DM = M.GetOrCreateDxilModule();
+  LLVMContext & Ctx = M.getContext();
+  OP *HlslOP = DM.GetOP();
+
+  bool Modified = false;
+
+  if (m_CheckForDynamicIndexing) {
+
+    bool FoundDynamicIndexing = false;
+
+    auto CreateHandleFn = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+    auto CreateHandleUses = CreateHandleFn->uses();
+    for (auto FI = CreateHandleUses.begin(); FI != CreateHandleUses.end(); ) {
+      auto & FunctionUse = *FI++;
+      auto FunctionUser = FunctionUse.getUser();
+      auto instruction = cast<Instruction>(FunctionUser);
+      Value * index = instruction->getOperand(3);
+      if (!isa<Constant>(index)) {
+        FoundDynamicIndexing = true;
+        break;
+      }
+    }
+
+    if (FoundDynamicIndexing) {
+      if (OSOverride != nullptr) {
+        formatted_raw_ostream FOS(*OSOverride);
+        FOS << "FoundDynamicIndexing";
+      }
+    }
+  }
+  else {
+    {
+      if (DM.m_ShaderFlags.GetForceEarlyDepthStencil()) {
+        if (OSOverride != nullptr) {
+          formatted_raw_ostream FOS(*OSOverride);
+          FOS << "ShouldAssumeDsvAccess";
+        }
+      }
+      IRBuilder<> Builder(DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
+
+      unsigned int UAVResourceHandle = static_cast<unsigned int>(DM.GetUAVs().size());
+
+      // Set up a UAV with structure of a single int
+      SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
+      llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "class.RWStructuredBuffer");
+      std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
+      pUAV->SetGlobalName("PIX_CountUAVName");
+      pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
+      pUAV->SetID(UAVResourceHandle);
+      pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
+      pUAV->SetSampleCount(1);
+      pUAV->SetGloballyCoherent(false);
+      pUAV->SetHasCounter(false);
+      pUAV->SetCompType(CompType::getI32());
+      pUAV->SetLowerBound(0);
+      pUAV->SetRangeSize(1);
+      pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
+
+      auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
+      if (pAnnotation == nullptr) {
+
+          pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
+          pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
+          pAnnotation->GetFieldAnnotation(0).SetCompType(hlsl::DXIL::ComponentType::I32);
+          pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
+      }
+
+      ID = DM.AddUAV(std::move(pUAV));
+
+      assert(ID == UAVResourceHandle);
+
+      // Create handle for the newly-added UAV
+      Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+      Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
+      Constant* UAVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
+      Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
+      Constant* IndexArg = HlslOP->GetU32Const(0); // 
+      Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
+      m_HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
+      { CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
+
+      DM.ReEmitDxilResources();
+    }
+
+    struct ResourceAccessFunction
+    {
+      DXIL::OpCode opcode;
+      ShaderAccessFlags readWrite;
+      bool functionUsesSamplerAtIndex2;
+      std::vector<Type*> overloads;
+    };
+
+    std::vector<Type*> voidType = { Type::getVoidTy(Ctx) };
+    std::vector<Type*> i32 = { Type::getInt32Ty(Ctx) };
+    std::vector<Type*> f16f32 = { Type::getHalfTy(Ctx), Type::getFloatTy(Ctx) };
+    std::vector<Type*> f32i32 = { Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx) };
+    std::vector<Type*> f32i32f64 = { Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx), Type::getDoubleTy(Ctx) };
+    std::vector<Type*> f16f32i16i32 = { Type::getHalfTy(Ctx), Type::getFloatTy(Ctx), Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx) };
+    std::vector<Type*> f16f32f64i16i32i64 = { Type::getHalfTy(Ctx), Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx), Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx), Type::getInt64Ty(Ctx) };
+
+
+    // todo: should "GetDimensions" mean a resource access?
+    ResourceAccessFunction raFunctions[] = {
+      { DXIL::OpCode::CBufferLoadLegacy     , ShaderAccessFlags::Read   , false, f32i32f64 },
+      { DXIL::OpCode::CBufferLoad           , ShaderAccessFlags::Read   , false, f16f32f64i16i32i64 },
+      { DXIL::OpCode::Sample                , ShaderAccessFlags::Read   , true , f16f32 },
+      { DXIL::OpCode::SampleBias            , ShaderAccessFlags::Read   , true , f16f32 },
+      { DXIL::OpCode::SampleLevel           , ShaderAccessFlags::Read   , true , f16f32 },
+      { DXIL::OpCode::SampleGrad            , ShaderAccessFlags::Read   , true , f16f32 },
+      { DXIL::OpCode::SampleCmp             , ShaderAccessFlags::Read   , true , f16f32 },
+      { DXIL::OpCode::SampleCmpLevelZero    , ShaderAccessFlags::Read   , true , f16f32 },
+      { DXIL::OpCode::TextureLoad           , ShaderAccessFlags::Read   , false, f16f32i16i32 },
+      { DXIL::OpCode::TextureStore          , ShaderAccessFlags::Write  , false, f16f32i16i32 },
+      { DXIL::OpCode::TextureGather         , ShaderAccessFlags::Read   , true , f32i32 }, // todo: SM6: f16f32i16i32 },
+      { DXIL::OpCode::TextureGatherCmp      , ShaderAccessFlags::Read   , false, f32i32 }, // todo: SM6: f16f32i16i32 },
+      { DXIL::OpCode::BufferLoad            , ShaderAccessFlags::Read   , false, f32i32 },
+      { DXIL::OpCode::RawBufferLoad         , ShaderAccessFlags::Read   , false, f32i32 },
+      { DXIL::OpCode::BufferStore           , ShaderAccessFlags::Write  , false, f32i32 },
+      { DXIL::OpCode::BufferUpdateCounter   , ShaderAccessFlags::Counter, false, voidType },
+      { DXIL::OpCode::AtomicBinOp           , ShaderAccessFlags::Write  , false, i32 },
+      { DXIL::OpCode::AtomicCompareExchange , ShaderAccessFlags::Write  , false, i32 },
+    };
+
+    for (const auto & raFunction : raFunctions) {
+      for (const auto & Overload : raFunction.overloads) {
+        Function * TheFunction = HlslOP->GetOpFunc(raFunction.opcode, Overload);
+        auto TexLoadFunctionUses = TheFunction->uses();
+        for (auto FI = TexLoadFunctionUses.begin(); FI != TexLoadFunctionUses.end(); ) {
+          auto & FunctionUse = *FI++;
+          auto FunctionUser = FunctionUse.getUser();
+          auto instruction = cast<Instruction>(FunctionUser);
+
+          auto res = GetResourceFromHandle(instruction->getOperand(1), DM);
+
+          // Don't instrument the accesses to the UAV that we just added
+          if (res.resource->GetSpaceID() == (unsigned)-2) {
+            continue;
+          }
+
+          if (EmitResourceAccess(res, instruction, HlslOP, Ctx, raFunction.readWrite)) {
+            Modified = true;
+          }
+
+          if (raFunction.functionUsesSamplerAtIndex2) {
+            auto sampler = GetResourceFromHandle(instruction->getOperand(2), DM);
+            if (EmitResourceAccess(sampler, instruction, HlslOP, Ctx, ShaderAccessFlags::Read)) {
+              Modified = true;
+            }
+          }
+        }
+      }
+    }
+
+    // StoreOutput for render-targets:
+    for (const auto & Overload : f16f32i16i32) {
+      Function * TheFunction = HlslOP->GetOpFunc(DXIL::OpCode::StoreOutput, Overload);
+      auto FunctionUses = TheFunction->uses();
+      for (auto FI = FunctionUses.begin(); FI != FunctionUses.end(); ) {
+        auto & FunctionUse = *FI++;
+        auto FunctionUser = FunctionUse.getUser();
+        auto instruction = cast<Instruction>(FunctionUser);
+
+        unsigned outputId = cast<ConstantInt>(instruction->getOperand(1))->getLimitedValue();
+
+        const DxilSignatureElement & sig = DM.GetOutputSignature().GetElement(outputId);
+
+        if (sig.GetSemantic()->GetKind() == DXIL::SemanticKind::Target){
+
+          auto slot = m_slotAssignments.find({ RegisterType::RTV, 0 });
+
+          if (slot != m_slotAssignments.end()) {
+            IRBuilder<> Builder(instruction);
+            EmitAccess(
+              Ctx, 
+              HlslOP, 
+              Builder, 
+              HlslOP->GetU32Const(slot->second.startSlot + sig.GetSemanticStartIndex()), 
+              ShaderAccessFlags::Write);
+            Modified = true;
+          }
+
+          for (auto const & limited : m_limitedAccessOutputs) {
+
+            auto slot = m_slotAssignments.find({ limited.first.Type, limited.first.Space });
+
+            if (slot != m_slotAssignments.end()) {
+              IRBuilder<> Builder(instruction);
+              EmitAccess(
+                Ctx,
+                HlslOP,
+                Builder,
+                HlslOP->GetU32Const(slot->second.startSlot),
+                ShaderAccessFlags::Write);
+              Modified = true;
+            }
+          }
+
+          // We do the limited access outputs (e.g. depth) on the first StoreOutput to the render target,
+          // a moment in the shader which is a good proxy for "this invocation hasn't been discarded".
+          m_limitedAccessOutputs.clear();
+        }
+      }
+    }
+
+    // EmitStream for stream out
+    {
+      Function * TheFunction = HlslOP->GetOpFunc(DXIL::OpCode::EmitStream, Type::getVoidTy(Ctx));
+      auto FunctionUses = TheFunction->uses();
+      for (auto FI = FunctionUses.begin(); FI != FunctionUses.end(); ) {
+        auto & FunctionUse = *FI++;
+        auto FunctionUser = FunctionUse.getUser();
+        auto instruction = cast<Instruction>(FunctionUser);
+
+        unsigned outputId = cast<ConstantInt>(instruction->getOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx))->getLimitedValue();
+
+        auto slot = m_slotAssignments.find({ RegisterType::SOV, 0 /* register space */ });
+
+        if (slot != m_slotAssignments.end()) {
+          IRBuilder<> Builder(instruction);
+          EmitAccess(
+            Ctx, 
+            HlslOP, 
+            Builder, 
+            HlslOP->GetU32Const(slot->second.startSlot + outputId),
+            ShaderAccessFlags::Write);
+          Modified = true;
+        }
+      }
+    }
+
+    if (OSOverride != nullptr) {
+      formatted_raw_ostream FOS(*OSOverride);
+      FOS << "DynamicallyIndexedBindPoints=";
+      for (auto const & bp : m_DynamicallyIndexedBindPoints) {
+        FOS << EncodeRegisterType(bp.Type) << bp.Space << ':' << bp.Index <<';';
+      }
+      FOS << ".";
+    }
+  }
+
+  return Modified;
+}
+
+char DxilShaderAccessTracking::ID = 0;
+
+ModulePass *llvm::createDxilShaderAccessTrackingPass() {
+  return new DxilShaderAccessTracking();
+}
+
+INITIALIZE_PASS(DxilShaderAccessTracking, "hlsl-dxil-pix-shader-access-instrumentation", "HLSL DXIL shader access tracking for PIX", false, false)

+ 28 - 0
tools/clang/test/HLSL/pix/AccessTracking.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -ECSMain -Tcs_6_0 %s | %opt -S -hlsl-dxil-pix-shader-access-instrumentation,config=S0:1:1i1;U0:2:10i0;.. | %FileCheck %s
+
+// Check we added the UAV:
+// CHECK:  %PIX_CountUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 0, i1 false)
+
+// check for correct out-of-bounds calculation
+// CHECK: CompareWithSlotLimit = icmp uge i32
+// CHECK: CompareWithSlotLimitAsUint = zext i1 %CompareWithSlotLimit to i32
+// CHECK: IsInBounds = sub i32 1, %CompareWithSlotLimitAsUint
+// CHECK: SlotOffset = add i32
+// CHECK: slotIndex = mul i32
+
+// Check for udpate of UAV:
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32
+
+
+ByteAddressBuffer inBuffer : register(t0);
+RWByteAddressBuffer bufferArray[] : register(u0);
+
+[numthreads(1, 1, 1)]
+void CSMain()
+{
+  // Simple read
+  uint dynamicBufferIndex = inBuffer.Load(0);
+
+  // Dynamically indexed write
+  bufferArray[dynamicBufferIndex].Store(0, 1);
+}

+ 24 - 0
tools/clang/test/HLSL/pix/AccessTrackingRTV.hlsl

@@ -0,0 +1,24 @@
+// RUN: %dxc -EPSMain -Tps_6_0 %s | %opt -S -hlsl-dxil-pix-shader-access-instrumentation,config=R0:1:2i1;.. | %FileCheck %s
+
+
+// Check for udpate of UAV for each target (last column is byte offset into UAV, indexed by RT array index)
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32 4
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32 8
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32 12
+
+struct PSOut
+{
+  float4 rt0 : SV_Target;
+  uint4 rt1 : SV_Target1;
+  int4 rt2 : SV_Target2;
+
+};
+
+PSOut PSMain()
+{
+  PSOut o;
+  o.rt0 = float4(1, 2, 3, 4);
+  o.rt1 = uint4(5, 6, 7, 8);
+  o.rt2 = int4(9, 10, 11, 12);
+  return o;
+}

+ 43 - 0
tools/clang/test/HLSL/pix/AccessTrackingStreamOut.hlsl

@@ -0,0 +1,43 @@
+// RUN: %dxc -Emain -Tgs_6_0 %s | %opt -S -hlsl-dxil-pix-shader-access-instrumentation,config=O0:1:3i1;.. | %FileCheck %s
+
+// Check for udpate of UAV for each stream (last column is byte offset into UAV, indexed by stream #)
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32 4
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32 8
+// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 2, i32 12
+
+struct MyStruct
+{
+  float4 pos : SV_Position;
+  float2 a : AAA;
+};
+
+struct MyStruct2
+{
+  uint3 X : XXX;
+  float4 p[3] : PPP;
+  uint3 Y : YYY;
+};
+
+[maxvertexcount(12)]
+void main(point float4 array[1] : COORD, inout PointStream<MyStruct> OutputStream0,
+  inout PointStream<MyStruct2> OutputStream1,
+  inout PointStream<MyStruct> OutputStream2)
+{
+  float4 r = array[0];
+  MyStruct a = (MyStruct)0;
+  MyStruct2 b = (MyStruct2)0;
+  a.pos = array[r.x];
+  a.a = r.xy;
+  b.X = r.xyz;
+  b.Y = a.pos.xyz;
+  b.p[2] = a.pos * 44;
+
+  OutputStream0.Append(a);
+  OutputStream0.RestartStrip();
+
+  OutputStream1.Append(b);
+  OutputStream1.RestartStrip();
+
+  OutputStream2.Append(a);
+  OutputStream2.RestartStrip();
+}

+ 15 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -458,6 +458,9 @@ public:
   TEST_METHOD(PixDebugPreexistingSVPosition)
   TEST_METHOD(PixDebugPreexistingSVVertex)
   TEST_METHOD(PixDebugPreexistingSVInstance)
+  TEST_METHOD(PixAccessTracking)
+  TEST_METHOD(PixAccessTrackingRTV)
+  TEST_METHOD(PixAccessTrackingStreamOut)
 
   TEST_METHOD(CodeGenAbs1)
   TEST_METHOD(CodeGenAbs2)
@@ -3004,6 +3007,18 @@ TEST_F(CompilerTest, PixDebugPreexistingSVInstance) {
   CodeGenTestCheck(L"pix\\DebugPreexistingSVInstance.hlsl");
 }
 
+TEST_F(CompilerTest, PixAccessTracking) {
+  CodeGenTestCheck(L"pix\\AccessTracking.hlsl");
+}
+
+TEST_F(CompilerTest, PixAccessTrackingRTV) {
+  CodeGenTestCheck(L"pix\\AccessTrackingRTV.hlsl");
+}
+
+TEST_F(CompilerTest, PixAccessTrackingStreamOut) {
+  CodeGenTestCheck(L"pix\\AccessTrackingStreamOut.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenAbs1) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\abs1.hlsl");
 }

+ 3 - 0
utils/hct/hctdb.py

@@ -1319,6 +1319,9 @@ class db_dxil(object):
             {'n':'constant-alpha','t':'float','c':1}])
         add_pass('hlsl-dxil-remove-discards', 'DxilRemoveDiscards', 'HLSL DXIL Remove all discard instructions', [])
         add_pass('hlsl-dxil-force-early-z', 'DxilForceEarlyZ', 'HLSL DXIL Force the early Z global flag, if shader has no discard calls', [])
+        add_pass('hlsl-dxil-pix-shader-access-instrumentation', 'DxilShaderAccessTracking', 'HLSL DXIL shader access tracking for PIX', [
+            {'n':'config','t':'int','c':1},
+            {'n':'checkForDynamicIndexing','t':'bool','c':1}])
         add_pass('hlsl-dxil-debug-instrumentation', 'DxilDebugInstrumentation', 'HLSL DXIL debug instrumentation for PIX', [
             {'n':'UAVSize','t':'int','c':1},
             {'n':'parameter0','t':'int','c':1},