8 years ago · b06a8153ff
--- a/include/dxc/HLSL/DxilGenerationPass.h
+++ b/include/dxc/HLSL/DxilGenerationPass.h
@@ -60,6 +60,7 @@ ModulePass *createDxilOutputColorBecomesConstantPass();
 
				 ModulePass *createDxilRemoveDiscardsPass();
			
 
				 ModulePass *createDxilReduceMSAAToSingleSamplePass();
			
 
				 ModulePass *createDxilForceEarlyZPass();
			
 
				+ModulePass *createDxilDebugInstrumentationPass();
			
 
				 
			
 
				 void initializeDxilCondenseResourcesPass(llvm::PassRegistry&);
			
 
				 void initializeDxilEliminateOutputDynamicIndexingPass(llvm::PassRegistry&);
			
@@ -84,6 +85,7 @@ void initializeDxilOutputColorBecomesConstantPass(llvm::PassRegistry&);
 
				 void initializeDxilRemoveDiscardsPass(llvm::PassRegistry&);
			
 
				 void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&);
			
 
				 void initializeDxilForceEarlyZPass(llvm::PassRegistry&);
			
 
				+void initializeDxilDebugInstrumentationPass(llvm::PassRegistry&);
			
 
				 
			
 
				 bool AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense);
			
 
				 
			
--- a/lib/HLSL/CMakeLists.txt
+++ b/lib/HLSL/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library(LLVMHLSL
 
				   DxilContainer.cpp
			
 
				   DxilContainerAssembler.cpp
			
 
				   DxilContainerReflection.cpp
			
 
				+  DxilDebugInstrumentation.cpp
			
 
				   DxilEliminateOutputDynamicIndexing.cpp
			
 
				   DxilExpandTrigIntrinsics.cpp
			
 
				   DxilForceEarlyZ.cpp
			
--- a/lib/HLSL/DxcOptimizer.cpp
+++ b/lib/HLSL/DxcOptimizer.cpp
@@ -85,6 +85,7 @@ HRESULT SetupRegistryPassForHLSL() {
 
				     initializeDxilAddPixelHitInstrumentationPass(Registry);
			
 
				     initializeDxilCondenseResourcesPass(Registry);
			
 
				     initializeDxilDeadFunctionEliminationPass(Registry);
			
 
				+    initializeDxilDebugInstrumentationPass(Registry);
			
 
				     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
			
 
				     initializeDxilEmitMetadataPass(Registry);
			
 
				     initializeDxilExpandTrigIntrinsicsPass(Registry);
			
@@ -176,6 +177,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
 
				   static const LPCSTR ArgPromotionArgs[] = { "maxElements" };
			
 
				   static const LPCSTR CFGSimplifyPassArgs[] = { "Threshold", "Ftor", "bonus-inst-threshold" };
			
 
				   static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "sv-position-index", "num-pixels" };
			
 
				+  static const LPCSTR DxilDebugInstrumentationArgs[] = { "UAVSize", "parameter0", "parameter1", "parameter2" };
			
 
				   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
			
 
				   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
			
 
				   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "ReplaceAllVectors" };
			
@@ -207,6 +209,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
 
				   if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
			
 
				   if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
			
 
				   if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
			
 
				+  if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
			
 
				   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
			
 
				   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
			
 
				   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
			
@@ -245,6 +248,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
 
				   static const LPCSTR ArgPromotionArgs[] = { "None" };
			
 
				   static const LPCSTR CFGSimplifyPassArgs[] = { "None", "None", "Control the number of bonus instructions (default = 1)" };
			
 
				   static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None", "None" };
			
 
				+  static const LPCSTR DxilDebugInstrumentationArgs[] = { "None", "None", "None", "None" };
			
 
				   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
			
 
				   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
			
 
				   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "None" };
			
@@ -276,6 +280,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
 
				   if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
			
 
				   if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
			
 
				   if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
			
 
				+  if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
			
 
				   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
			
 
				   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
			
 
				   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
			
@@ -328,6 +333,7 @@ static bool IsPassOptionName(StringRef S) {
 
				     ||  S.equals("TIRA")
			
 
				     ||  S.equals("TLIImpl")
			
 
				     ||  S.equals("Threshold")
			
 
				+    ||  S.equals("UAVSize")
			
 
				     ||  S.equals("add-pixel-cost")
			
 
				     ||  S.equals("bonus-inst-threshold")
			
 
				     ||  S.equals("constant-alpha")
			
@@ -356,6 +362,9 @@ static bool IsPassOptionName(StringRef S) {
 
				     ||  S.equals("no-discriminators")
			
 
				     ||  S.equals("noloads")
			
 
				     ||  S.equals("num-pixels")
			
 
				+    ||  S.equals("parameter0")
			
 
				+    ||  S.equals("parameter1")
			
 
				+    ||  S.equals("parameter2")
			
 
				     ||  S.equals("pragma-unroll-threshold")
			
 
				     ||  S.equals("reroll-num-tolerated-failed-matches")
			
 
				     ||  S.equals("rewrite-map-file")
			
--- a/lib/HLSL/DxilDebugInstrumentation.cpp
+++ b/lib/HLSL/DxilDebugInstrumentation.cpp
@@ -0,0 +1,732 @@
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+//                                                                           //
			
 
				+// DxilDebugInstrumentation.cpp                                              //
			
 
				+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
			
 
				+// This file is distributed under the University of Illinois Open Source     //
			
 
				+// License. See LICENSE.TXT for details.                                     //
			
 
				+//                                                                           //
			
 
				+// Adds instrumentation that enables shader debugging in PIX                 //
			
 
				+//                                                                           //
			
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+#include "dxc/HLSL/DxilGenerationPass.h"
			
 
				+#include "dxc/HLSL/DxilOperations.h"
			
 
				+#include "dxc/HLSL/DxilModule.h"
			
 
				+
			
 
				+#include "llvm/IR/Module.h"
			
 
				+#include "llvm/IR/Constants.h"
			
 
				+#include "llvm/IR/InstIterator.h"
			
 
				+#include "llvm/IR/IRBuilder.h"
			
 
				+
			
 
				+
			
 
				+using namespace llvm;
			
 
				+using namespace hlsl;
			
 
				+
			
 
				+// Overview of instrumentation:
			
 
				+// 
			
 
				+// In summary, instructions are added that cause a "trace" of the execution of the shader to be written
			
 
				+// out to a UAV. This trace is then used by a debugger application to provide a post-mortem debugging
			
 
				+// experience that reconstructs the execution history of the shader.
			
 
				+// 
			
 
				+// The trace is only required for a particular shader instance of interest, and a branchless mechanism
			
 
				+// is used to write the trace either to an incrementing location within the UAV, or to a "dumping ground"
			
 
				+// area at the top of the UAV if the instance is not of interest.
			
 
				+// 
			
 
				+// The following modifications are made:
			
 
				+// 
			
 
				+// First, instructions are added to the top of the entry point function that implement the following:
			
 
				+// -  Examine the input variables that define the instance of the shader that is running. This will
			
 
				+//    be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex shaders, thread id for compute
			
 
				+//    shaders etc. If these system values need to be added to the shader, then they are also added to the
			
 
				+//    input signature, if appropriate.
			
 
				+// -  Compare the above variables with the instance of interest defined by the invoker of this pass.
			
 
				+//    Deduce two values: a multiplicand and an addend that together allow a branchless calculation of
			
 
				+//    the offset into the UAV at which to write via "offset = offset * multiplicand + addend."
			
 
				+//    If the instance is NOT of interest, the multiplicand is zero and the addend is 
			
 
				+//    sizeof(UAV)-(a little bit), causing writes for uninteresting invocations to end up at the top of 
			
 
				+//    the UAV. Otherwise the multiplicand is 1 and the addend is 0.
			
 
				+// -  Calculate an "instance identifier". Even with the above instance identification, several invocations may
			
 
				+//    end up matching the selection criteria. Specifically, this happens during a draw call in which many
			
 
				+//    triangles overlap the pixel of interest. More on this below.
			
 
				+//    
			
 
				+// During execution, the instrumentation for most instructions cause data to be emitted to the UAV. 
			
 
				+// The index at which data is written is identified by treating the first uint32 of the UAV as an index 
			
 
				+// which is atomically incremented by the instrumentation. The very first value of this counter that is
			
 
				+// encountered by each invocation is used as the "instance identifier" mentioned above. That instance
			
 
				+// identifier is written out with each packet, since many pixel shaders executing in parallel will emit
			
 
				+// interleaved packets, and the debugger application uses the identifiers to group packets from each separate
			
 
				+// invocation together.
			
 
				+// 
			
 
				+// If an instruction has a non-void and primitive return type, i.e. isn't a struct, then the instrumentation
			
 
				+// will write that value out to the UAV as well as part of the "step" data packet.
			
 
				+//    
			
 
				+// The limiting size of the UAV is enforced in a branchless way by ANDing the offset with a precomputed
			
 
				+// value that is sizeof(UAV)-64. The actual size of the UAV allocated by the caller is required to be
			
 
				+// a power of two plus 64 for this reason. The caller detects UAV overrun by examining a canary value
			
 
				+// close to the end of the power-of-two size of the UAV. If this value has been overwritten, the debug session
			
 
				+// is deemed to have overflowed the UAV. The caller will than allocate a UAV that is twice the size and
			
 
				+// try again, up to a predefined maximum.
			
 
				+
			
 
				+// Keep this in sync with the same-named value in the debugger application's WinPixShaderUtils.h
			
 
				+constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
			
 
				+
			
 
				+
			
 
				+// These definitions echo those in the debugger application's debugshaderrecord.h file
			
 
				+enum DebugShaderModifierRecordType {
			
 
				+  DebugShaderModifierRecordTypeInvocationStartMarker,
			
 
				+  DebugShaderModifierRecordTypeStep,
			
 
				+  DebugShaderModifierRecordTypeEvent,
			
 
				+  DebugShaderModifierRecordTypeInputRegister,
			
 
				+  DebugShaderModifierRecordTypeReadRegister,
			
 
				+  DebugShaderModifierRecordTypeWrittenRegister,
			
 
				+  DebugShaderModifierRecordTypeRegisterRelativeIndex0,
			
 
				+  DebugShaderModifierRecordTypeRegisterRelativeIndex1,
			
 
				+  DebugShaderModifierRecordTypeRegisterRelativeIndex2,
			
 
				+  DebugShaderModifierRecordTypeDXILStepVoid = 251,
			
 
				+  DebugShaderModifierRecordTypeDXILStepFloat = 252,
			
 
				+  DebugShaderModifierRecordTypeDXILStepUint32 = 253,
			
 
				+  DebugShaderModifierRecordTypeDXILStepUint64 = 254,
			
 
				+  DebugShaderModifierRecordTypeDXILStepDouble = 255,
			
 
				+};
			
 
				+
			
 
				+// These structs echo those in the debugger application's debugshaderrecord.h file, but are recapitulated here
			
 
				+// because the originals use unnamed unions which are disallowed by DXCompiler's build.
			
 
				+// 
			
 
				+#pragma pack(push,4)
			
 
				+struct DebugShaderModifierRecordHeader {
			
 
				+  union  {
			
 
				+    struct {
			
 
				+      uint32_t SizeDwords : 4;
			
 
				+      uint32_t Flags : 4;
			
 
				+      uint32_t Type : 8;
			
 
				+      uint32_t HeaderPayload : 16;
			
 
				+    } Details;
			
 
				+    uint32_t u32Header;
			
 
				+  } Header;
			
 
				+  uint32_t UID;
			
 
				+};
			
 
				+
			
 
				+struct DebugShaderModifierRecordDXILStepBase {
			
 
				+  union {
			
 
				+    struct {
			
 
				+      uint32_t SizeDwords : 4;
			
 
				+      uint32_t Flags : 4;
			
 
				+      uint32_t Type : 8;
			
 
				+      uint32_t Opcode : 16;
			
 
				+    } Details;
			
 
				+    uint32_t u32Header;
			
 
				+  } Header;
			
 
				+  uint32_t UID;
			
 
				+  uint32_t InstructionOffset;
			
 
				+};
			
 
				+
			
 
				+template< typename ReturnType >
			
 
				+struct DebugShaderModifierRecordDXILStep : public DebugShaderModifierRecordDXILStepBase {
			
 
				+  ReturnType ReturnValue;
			
 
				+};
			
 
				+
			
 
				+template< >
			
 
				+struct DebugShaderModifierRecordDXILStep<void> : public DebugShaderModifierRecordDXILStepBase {
			
 
				+};
			
 
				+#pragma pack(pop)
			
 
				+
			
 
				+
			
 
				+uint32_t DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
			
 
				+  return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) / sizeof(uint32_t));
			
 
				+}
			
 
				+
			
 
				+class DxilDebugInstrumentation : public ModulePass {
			
 
				+
			
 
				+private:
			
 
				+  union ParametersAllTogether {
			
 
				+    unsigned Parameters[3];
			
 
				+    struct PixelShaderParameters {
			
 
				+      unsigned X;
			
 
				+      unsigned Y;
			
 
				+    } PixelShader;
			
 
				+    struct VertexShaderParameters {
			
 
				+      unsigned VertexId;
			
 
				+      unsigned InstanceId;
			
 
				+    } VertexShader;
			
 
				+    struct ComputeShaderParameters {
			
 
				+      unsigned ThreadIdX;
			
 
				+      unsigned ThreadIdY;
			
 
				+      unsigned ThreadIdZ;
			
 
				+    } ComputeShader;
			
 
				+  } m_Parameters = { 0,0,0 };
			
 
				+
			
 
				+  union SystemValueIndices {
			
 
				+    struct PixelShaderParameters {
			
 
				+      unsigned Position;
			
 
				+    } PixelShader;
			
 
				+    struct VertexShaderParameters {
			
 
				+      unsigned VertexId;
			
 
				+      unsigned InstanceId;
			
 
				+    } VertexShader;
			
 
				+  };
			
 
				+
			
 
				+  uint64_t m_UAVSize = 1024*1024;
			
 
				+  Value * m_SelectionCriterion = nullptr;
			
 
				+  CallInst * m_HandleForUAV = nullptr;
			
 
				+  Value * m_InvocationId = nullptr;
			
 
				+
			
 
				+  // Together these two values allow branchless writing to the UAV. An invocation of the shader
			
 
				+  // is either of interest or not (e.g. it writes to the pixel the user selected for debugging
			
 
				+  // or it doesn't). If not of interest, debugging output will still occur, but it will be
			
 
				+  // relegated to the very top few bytes of the UAV. Invocations of interest, by contrast, will
			
 
				+  // be written to the UAV at sequentially increasing offsets.
			
 
				+
			
 
				+  // This value will either be one or zero (one if the invocation is of interest, zero otherwise)
			
 
				+  Value * m_OffsetMultiplicand = nullptr;
			
 
				+  // This will either be zero (if the invocation is of interest) or (UAVSize)-(SmallValue) if not.
			
 
				+  Value * m_OffsetAddend = nullptr;
			
 
				+
			
 
				+  Constant * m_OffsetMask = nullptr;
			
 
				+
			
 
				+  std::map<uint32_t, Value *> m_IncrementInstructionBySize;
			
 
				+
			
 
				+  unsigned int m_InstructionIndex = 0;
			
 
				+
			
 
				+  struct BuilderContext {
			
 
				+    Module &M;
			
 
				+    DxilModule &DM;
			
 
				+    LLVMContext & Ctx;
			
 
				+    OP * HlslOP;
			
 
				+    IRBuilder<> & Builder;
			
 
				+  };
			
 
				+
			
 
				+  uint32_t m_RemainingReservedSpaceInBytes = 0;
			
 
				+  Value * m_CurrentIndex = nullptr;
			
 
				+
			
 
				+public:
			
 
				+  static char ID; // Pass identification, replacement for typeid
			
 
				+  explicit DxilDebugInstrumentation() : ModulePass(ID) {}
			
 
				+  const char *getPassName() const override { return "Add PIX debug instrumentation"; }
			
 
				+  void applyOptions(PassOptions O) override;
			
 
				+  bool runOnModule(Module &M) override;
			
 
				+
			
 
				+private:
			
 
				+  SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
			
 
				+  void addUAV(BuilderContext &BC);
			
 
				+  void addInvocationSelectionProlog(BuilderContext &BC, SystemValueIndices SVIndices);
			
 
				+  Value * addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
			
 
				+  Value * addComputeShaderProlog(BuilderContext &BC);
			
 
				+  Value * addVertexShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
			
 
				+  void addDebugEntryValue(BuilderContext &BC, Value * TheValue);
			
 
				+  void addInvocationStartMarker(BuilderContext &BC);
			
 
				+  void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
			
 
				+  void addStepDebugEntry(BuilderContext &BC, Instruction *Inst);
			
 
				+  uint32_t UAVDumpingGroundOffset();
			
 
				+  template<typename ReturnType>
			
 
				+  void addStepEntryForType(DebugShaderModifierRecordType RecordType, BuilderContext &BC, Instruction *Inst);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+void DxilDebugInstrumentation::applyOptions(PassOptions O) {
			
 
				+  for (const auto & option : O) {
			
 
				+    if (0 == option.first.compare("parameter0")) {
			
 
				+      m_Parameters.Parameters[0] = atoi(option.second.data());
			
 
				+    }
			
 
				+    else if (0 == option.first.compare("parameter1")) {
			
 
				+      m_Parameters.Parameters[1] = atoi(option.second.data()); 
			
 
				+    }
			
 
				+    else if (0 == option.first.compare("parameter2")) {
			
 
				+      m_Parameters.Parameters[2] = atoi(option.second.data());
			
 
				+    }
			
 
				+    else if (0 == option.first.compare("UAVSize")) {
			
 
				+      m_UAVSize = std::stoull(option.second.data());
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
			
 
				+  return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+DxilDebugInstrumentation::SystemValueIndices DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) {
			
 
				+  SystemValueIndices SVIndices{};
			
 
				+
			
 
				+  hlsl::DxilSignature & InputSignature = BC.DM.GetInputSignature();
			
 
				+
			
 
				+  auto & InputElements = InputSignature.GetElements();
			
 
				+
			
 
				+  auto ShaderModel = BC.DM.GetShaderModel();
			
 
				+  switch (ShaderModel->GetKind()) {
			
 
				+  case DXIL::ShaderKind::Pixel: {
			
 
				+    auto Existing_SV_Position = std::find_if(
			
 
				+      InputElements.begin(), InputElements.end(),
			
 
				+      [](const std::unique_ptr<DxilSignatureElement> & Element) {
			
 
				+      return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; });
			
 
				+
			
 
				+    // SV_Position, if present, has to have full mask, so we needn't worry 
			
 
				+    // about the shader having selected components that don't include x or y.
			
 
				+    // If not present, we add it.
			
 
				+    if (Existing_SV_Position == InputElements.end()) {
			
 
				+      auto Added_SV_Position = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
			
 
				+      Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4);
			
 
				+      Added_SV_Position->AppendSemanticIndex(0);
			
 
				+      Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
			
 
				+      Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
			
 
				+
			
 
				+      auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
			
 
				+      SVIndices.PixelShader.Position = InputElements[index]->GetID();
			
 
				+    }
			
 
				+    else {
			
 
				+      SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
			
 
				+    }
			
 
				+  }
			
 
				+  break;
			
 
				+  case DXIL::ShaderKind::Vertex: {
			
 
				+    {
			
 
				+      auto Existing_SV_VertexId = std::find_if(
			
 
				+        InputElements.begin(), InputElements.end(),
			
 
				+        [](const std::unique_ptr<DxilSignatureElement> & Element) {
			
 
				+        return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::VertexID; });
			
 
				+
			
 
				+      if (Existing_SV_VertexId == InputElements.end()) {
			
 
				+        auto Added_SV_VertexId = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
			
 
				+        Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
			
 
				+        Added_SV_VertexId->AppendSemanticIndex(0);
			
 
				+        Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
			
 
				+        Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID);
			
 
				+
			
 
				+        auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId));
			
 
				+        SVIndices.VertexShader.VertexId = InputElements[index]->GetID();
			
 
				+      }
			
 
				+      else {
			
 
				+        SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID();
			
 
				+      }
			
 
				+    }
			
 
				+    {
			
 
				+      auto Existing_SV_InstanceId = std::find_if(
			
 
				+        InputElements.begin(), InputElements.end(),
			
 
				+        [](const std::unique_ptr<DxilSignatureElement> & Element) {
			
 
				+        return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::InstanceID; });
			
 
				+
			
 
				+      if (Existing_SV_InstanceId == InputElements.end()) {
			
 
				+        auto Added_SV_InstanceId = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
			
 
				+        Added_SV_InstanceId->Initialize("InstanceId", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
			
 
				+        Added_SV_InstanceId->AppendSemanticIndex(0);
			
 
				+        Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);
			
 
				+        Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID);
			
 
				+
			
 
				+        auto index = InputSignature.AppendElement(std::move(Added_SV_InstanceId));
			
 
				+        SVIndices.VertexShader.InstanceId = InputElements[index]->GetID();
			
 
				+      }
			
 
				+      else {
			
 
				+        SVIndices.VertexShader.InstanceId = Existing_SV_InstanceId->get()->GetID();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  break;
			
 
				+  case DXIL::ShaderKind::Compute:
			
 
				+    // Compute thread Id is not in the input signature
			
 
				+  break;
			
 
				+  default:
			
 
				+    assert(false); // guaranteed by runOnModule
			
 
				+  }
			
 
				+
			
 
				+  return SVIndices;
			
 
				+}
			
 
				+
			
 
				+Value * DxilDebugInstrumentation::addComputeShaderProlog(BuilderContext &BC) {
			
 
				+  Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
			
 
				+  Constant* One32Arg = BC.HlslOP->GetU32Const(1);
			
 
				+  Constant* Two32Arg = BC.HlslOP->GetU32Const(2);
			
 
				+
			
 
				+  auto ThreadIdFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
			
 
				+  Constant* Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
			
 
				+  auto ThreadIdX = BC.Builder.CreateCall(ThreadIdFunc, { Opcode, Zero32Arg }, "ThreadIdX");
			
 
				+  auto ThreadIdY = BC.Builder.CreateCall(ThreadIdFunc, { Opcode, One32Arg  }, "ThreadIdY");
			
 
				+  auto ThreadIdZ = BC.Builder.CreateCall(ThreadIdFunc, { Opcode, Two32Arg  }, "ThreadIdZ");
			
 
				+
			
 
				+  // Compare to expected thread ID
			
 
				+  auto CompareToX = BC.Builder.CreateICmpEQ(ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX), "CompareToThreadIdX");
			
 
				+  auto CompareToY = BC.Builder.CreateICmpEQ(ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY), "CompareToThreadIdY");
			
 
				+  auto CompareToZ = BC.Builder.CreateICmpEQ(ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ), "CompareToThreadIdZ");
			
 
				+
			
 
				+  auto CompareXAndY = BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
			
 
				+
			
 
				+  auto CompareAll = BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
			
 
				+
			
 
				+  return CompareAll;
			
 
				+}
			
 
				+
			
 
				+Value * DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
			
 
				+  Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
			
 
				+  Constant* Zero8Arg = BC.HlslOP->GetI8Const(0);
			
 
				+  UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
			
 
				+
			
 
				+  auto LoadInputOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
			
 
				+  Constant* LoadInputOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
			
 
				+  Constant*  SV_Vert_ID = BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
			
 
				+  auto VertId = BC.Builder.CreateCall(LoadInputOpFunc,
			
 
				+  { LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "VertId");
			
 
				+
			
 
				+  Constant*  SV_Instance_ID = BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
			
 
				+  auto InstanceId = BC.Builder.CreateCall(LoadInputOpFunc,
			
 
				+  { LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "InstanceId");
			
 
				+
			
 
				+  // Compare to expected vertex ID and instance ID
			
 
				+  auto CompareToVert = BC.Builder.CreateICmpEQ(VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId), "CompareToVertId");
			
 
				+  auto CompareToInstance = BC.Builder.CreateICmpEQ(InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId), "CompareToInstanceId");
			
 
				+  auto CompareBoth = BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
			
 
				+
			
 
				+  return CompareBoth;
			
 
				+}
			
 
				+
			
 
				+Value * DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
			
 
				+  Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
			
 
				+  Constant* Zero8Arg = BC.HlslOP->GetI8Const(0);
			
 
				+  Constant* One8Arg = BC.HlslOP->GetI8Const(1);
			
 
				+  UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
			
 
				+
			
 
				+  // Convert SV_POSITION to UINT    
			
 
				+  Value * XAsInt;
			
 
				+  Value * YAsInt;
			
 
				+  {
			
 
				+    auto LoadInputOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
			
 
				+    Constant* LoadInputOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
			
 
				+    Constant*  SV_Pos_ID = BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
			
 
				+    auto XPos = BC.Builder.CreateCall(LoadInputOpFunc,
			
 
				+    { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos");
			
 
				+    auto YPos = BC.Builder.CreateCall(LoadInputOpFunc,
			
 
				+    { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos");
			
 
				+
			
 
				+    XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(BC.Ctx), "XIndex");
			
 
				+    YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(BC.Ctx), "YIndex");
			
 
				+  }
			
 
				+
			
 
				+  // Compare to expected pixel position and primitive ID
			
 
				+  auto CompareToX = BC.Builder.CreateICmpEQ(XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
			
 
				+  auto CompareToY = BC.Builder.CreateICmpEQ(YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
			
 
				+  auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
			
 
				+
			
 
				+  return ComparePos;
			
 
				+}
			
 
				+
			
 
				+void DxilDebugInstrumentation::addUAV(BuilderContext &BC)
			
 
				+{
			
 
				+  // Set up a UAV with structure of a single int
			
 
				+  unsigned int UAVResourceHandle = static_cast<unsigned int>(BC.DM.GetUAVs().size());
			
 
				+  SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(BC.Ctx) };
			
 
				+  llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
			
 
				+  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
			
 
				+  pUAV->SetGlobalName("PIX_DebugUAVName");
			
 
				+  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
			
 
				+  pUAV->SetID(UAVResourceHandle);
			
 
				+  pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
			
 
				+  pUAV->SetSampleCount(1);
			
 
				+  pUAV->SetGloballyCoherent(false);
			
 
				+  pUAV->SetHasCounter(false);
			
 
				+  pUAV->SetCompType(CompType::getI32());
			
 
				+  pUAV->SetLowerBound(0);
			
 
				+  pUAV->SetRangeSize(1);
			
 
				+  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
			
 
				+  pUAV->SetRW(true);
			
 
				+
			
 
				+  auto ID = BC.DM.AddUAV(std::move(pUAV));
			
 
				+  assert(ID == UAVResourceHandle);
			
 
				+
			
 
				+  BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
			
 
				+
			
 
				+  // Create handle for the newly-added UAV
			
 
				+  Function* CreateHandleOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
			
 
				+  Constant* CreateHandleOpcodeArg = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
			
 
				+  Constant* UAVVArg = BC.HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
			
 
				+  Constant* MetaDataArg = BC.HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
			
 
				+  Constant* IndexArg = BC.HlslOP->GetU32Const(0); // 
			
 
				+  Constant* FalseArg = BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
			
 
				+  m_HandleForUAV = BC.Builder.CreateCall(CreateHandleOpFunc,
			
 
				+  { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_DebugUAV_Handle");
			
 
				+}
			
 
				+
			
 
				+void DxilDebugInstrumentation::addInvocationSelectionProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
			
 
				+  auto ShaderModel = BC.DM.GetShaderModel();
			
 
				+
			
 
				+  Value * ParameterTestResult;
			
 
				+  switch (ShaderModel->GetKind()) {
			
 
				+  case DXIL::ShaderKind::Pixel:
			
 
				+    ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
			
 
				+    break;
			
 
				+    case DXIL::ShaderKind::Vertex:
			
 
				+      ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
			
 
				+    break;
			
 
				+    case DXIL::ShaderKind::Compute:
			
 
				+      ParameterTestResult = addComputeShaderProlog(BC);
			
 
				+    break;
			
 
				+  default:
			
 
				+    assert(false); // guaranteed by runOnModule
			
 
				+  }
			
 
				+
			
 
				+  // This is a convenient place to calculate the values that modify the UAV offset for invocations of interest and for
			
 
				+  // UAV size.
			
 
				+  m_OffsetMultiplicand = BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult, Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand");
			
 
				+  auto InverseOffsetMultiplicand = BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand, "ComplementOfMultiplicand");
			
 
				+  m_OffsetAddend = BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()), InverseOffsetMultiplicand, "OffsetAddend");
			
 
				+  m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
			
 
				+
			
 
				+  m_SelectionCriterion = ParameterTestResult;
			
 
				+}
			
 
				+
			
 
				+void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes) {
			
 
				+  assert(m_CurrentIndex == nullptr);
			
 
				+  assert(m_RemainingReservedSpaceInBytes == 0);
			
 
				+
			
 
				+  m_RemainingReservedSpaceInBytes = SpaceInBytes;
			
 
				+
			
 
				+  // Insert the UAV increment instruction:
			
 
				+  Function* AtomicOpFunc = BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
			
 
				+  Constant* AtomicBinOpcode = BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
			
 
				+  Constant* AtomicAdd = BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
			
 
				+  Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
			
 
				+  UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
			
 
				+
			
 
				+  // so inc will be zero for uninteresting invocations:
			
 
				+  Value * IncrementForThisInvocation;
			
 
				+  auto findIncrementInstruction = m_IncrementInstructionBySize.find(SpaceInBytes);
			
 
				+  if (findIncrementInstruction == m_IncrementInstructionBySize.end()) {
			
 
				+    Constant* Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
			
 
				+    auto it = m_IncrementInstructionBySize.emplace(
			
 
				+      SpaceInBytes, BC.Builder.CreateMul(Increment, m_OffsetMultiplicand, "IncrementForThisInvocation"));
			
 
				+    findIncrementInstruction = it.first;
			
 
				+  }
			
 
				+  IncrementForThisInvocation = findIncrementInstruction->second;
			
 
				+
			
 
				+  auto PreviousValue = BC.Builder.CreateCall(AtomicOpFunc, {
			
 
				+    AtomicBinOpcode,// i32, ; opcode
			
 
				+    m_HandleForUAV, // %dx.types.Handle, ; resource handle
			
 
				+    AtomicAdd,      // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
			
 
				+    Zero32Arg,      // i32, ; coordinate c0: index in bytes
			
 
				+    UndefArg,       // i32, ; coordinate c1 (unused)
			
 
				+    UndefArg,       // i32, ; coordinate c2 (unused)
			
 
				+    IncrementForThisInvocation,      // i32); increment value
			
 
				+  }, "UAVIncResult");
			
 
				+
			
 
				+  if (m_InvocationId == nullptr)
			
 
				+  {
			
 
				+      m_InvocationId = PreviousValue;
			
 
				+  }
			
 
				+
			
 
				+  auto MaskedForLimit = BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
			
 
				+  // The return value will either end up being itself (multiplied by one and added with zero)
			
 
				+  // or the "dump uninteresting things here" value of (UAVSize - a bit).
			
 
				+  auto MultipliedForInterest = BC.Builder.CreateMul(MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest");
			
 
				+  auto AddedForInterest = BC.Builder.CreateAdd(MultipliedForInterest, m_OffsetAddend, "AddedForInterest");
			
 
				+  m_CurrentIndex = AddedForInterest;
			
 
				+}
			
 
				+
			
 
				+void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, Value * TheValue) {
			
 
				+  assert(m_RemainingReservedSpaceInBytes > 0);
			
 
				+
			
 
				+  auto TheValueTypeID = TheValue->getType()->getTypeID();
			
 
				+  if (TheValueTypeID == Type::TypeID::DoubleTyID) {
			
 
				+    Function* SplitDouble = BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
			
 
				+    Constant* SplitDoubleOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
			
 
				+    auto SplitDoubleIntruction = BC.Builder.CreateCall(SplitDouble, { SplitDoubleOpcode, TheValue }, "SplitDouble");
			
 
				+    auto LowBits = BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
			
 
				+    auto HighBits = BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
			
 
				+    //addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
			
 
				+    addDebugEntryValue(BC, LowBits);
			
 
				+    addDebugEntryValue(BC, HighBits);
			
 
				+  }
			
 
				+  else if (TheValueTypeID == Type::TypeID::IntegerTyID && TheValue->getType()->getIntegerBitWidth() == 64) {
			
 
				+    auto LowBits = BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
			
 
				+    auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
			
 
				+    auto HighBits = BC.Builder.CreateTrunc(ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
			
 
				+    //addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
			
 
				+    addDebugEntryValue(BC, LowBits);
			
 
				+    addDebugEntryValue(BC, HighBits);
			
 
				+  }
			
 
				+  else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
			
 
				+    (TheValue->getType()->getIntegerBitWidth() == 16 || TheValue->getType()->getIntegerBitWidth() == 1)) {
			
 
				+    auto As32 = BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
			
 
				+    addDebugEntryValue(BC, As32);
			
 
				+  }
			
 
				+  else if (TheValueTypeID == Type::TypeID::HalfTyID) {
			
 
				+    auto AsFloat = BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
			
 
				+    addDebugEntryValue(BC, AsFloat);
			
 
				+  }
			
 
				+  else {
			
 
				+    Function* StoreValue = BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
			
 
				+    Constant* StoreValueOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
			
 
				+    Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
			
 
				+    Constant* ZeroArg;
			
 
				+    if (TheValueTypeID == Type::TypeID::IntegerTyID) {
			
 
				+      ZeroArg = BC.HlslOP->GetU32Const(0);
			
 
				+    }
			
 
				+    else if (TheValueTypeID == Type::TypeID::FloatTyID) {
			
 
				+      ZeroArg = BC.HlslOP->GetFloatConst(0.f);
			
 
				+    }
			
 
				+    else {
			
 
				+      // The above are the only two valid types for a UAV store
			
 
				+      assert(false);
			
 
				+    }
			
 
				+    Constant* WriteMask_X = BC.HlslOP->GetI8Const(1);
			
 
				+    (void)BC.Builder.CreateCall(StoreValue, {
			
 
				+      StoreValueOpcode, // i32 opcode
			
 
				+      m_HandleForUAV,     // %dx.types.Handle, ; resource handle
			
 
				+      m_CurrentIndex,            // i32 c0: index in bytes into UAV
			
 
				+      Zero32Arg,        // i32 c1: unused
			
 
				+      TheValue,
			
 
				+      ZeroArg,        // unused values
			
 
				+      ZeroArg,        // unused values
			
 
				+      ZeroArg,        // unused values
			
 
				+      WriteMask_X
			
 
				+    });
			
 
				+
			
 
				+    m_RemainingReservedSpaceInBytes -= 4;
			
 
				+    assert(m_RemainingReservedSpaceInBytes < 1024);  // check for underflow
			
 
				+
			
 
				+    if (m_RemainingReservedSpaceInBytes != 0) {
			
 
				+      m_CurrentIndex = BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4));
			
 
				+    }
			
 
				+    else {
			
 
				+      m_CurrentIndex = nullptr;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
			
 
				+  DebugShaderModifierRecordHeader marker{ 0 };
			
 
				+  reserveDebugEntrySpace(BC, sizeof(marker));
			
 
				+
			
 
				+  marker.Header.Details.SizeDwords = DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));;
			
 
				+  marker.Header.Details.Flags = 0;
			
 
				+  marker.Header.Details.Type = DebugShaderModifierRecordTypeInvocationStartMarker;
			
 
				+  addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
			
 
				+  addDebugEntryValue(BC, m_InvocationId);
			
 
				+}
			
 
				+
			
 
				+template<typename ReturnType>
			
 
				+void DxilDebugInstrumentation::addStepEntryForType(DebugShaderModifierRecordType RecordType, BuilderContext &BC, Instruction *Inst) {
			
 
				+
			
 
				+  DebugShaderModifierRecordDXILStep<ReturnType> step = {};
			
 
				+  reserveDebugEntrySpace(BC, sizeof(step));
			
 
				+
			
 
				+  step.Header.Details.SizeDwords = DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
			
 
				+  step.Header.Details.Type = static_cast<uint8_t>(RecordType);
			
 
				+  addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
			
 
				+  addDebugEntryValue(BC, m_InvocationId);
			
 
				+  addDebugEntryValue(BC, BC.HlslOP->GetU32Const(m_InstructionIndex++));
			
 
				+
			
 
				+  if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) {
			
 
				+    addDebugEntryValue(BC, Inst);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC, Instruction *Inst) {
			
 
				+  if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  Type::TypeID ID = Inst->getType()->getTypeID();
			
 
				+
			
 
				+  switch (ID) {
			
 
				+  case Type::TypeID::StructTyID:
			
 
				+  case Type::TypeID::VoidTyID:
			
 
				+    addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC, Inst);
			
 
				+    break;
			
 
				+  case Type::TypeID::FloatTyID:
			
 
				+    addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC, Inst);
			
 
				+    break;
			
 
				+  case Type::TypeID::IntegerTyID:
			
 
				+    if (Inst->getType()->getIntegerBitWidth() == 64) {
			
 
				+      addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64, BC, Inst);
			
 
				+    }
			
 
				+    else {
			
 
				+      addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32, BC, Inst);
			
 
				+    }
			
 
				+    break;
			
 
				+  case Type::TypeID::DoubleTyID:
			
 
				+    addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC, Inst);
			
 
				+    break;
			
 
				+  case Type::TypeID::HalfTyID:
			
 
				+    addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC, Inst);
			
 
				+    break;
			
 
				+  case Type::TypeID::FP128TyID:
			
 
				+  case Type::TypeID::LabelTyID:
			
 
				+  case Type::TypeID::MetadataTyID:
			
 
				+  case Type::TypeID::FunctionTyID:
			
 
				+  case Type::TypeID::ArrayTyID:
			
 
				+  case Type::TypeID::PointerTyID:
			
 
				+  case Type::TypeID::VectorTyID:
			
 
				+    assert(false);
			
 
				+  }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+bool DxilDebugInstrumentation::runOnModule(Module &M) {
			
 
				+  DxilModule &DM = M.GetOrCreateDxilModule();
			
 
				+  LLVMContext & Ctx = M.getContext();
			
 
				+  OP *HlslOP = DM.GetOP();
			
 
				+
			
 
				+  auto ShaderModel = DM.GetShaderModel();
			
 
				+  switch (ShaderModel->GetKind()) {
			
 
				+  case DXIL::ShaderKind::Pixel:
			
 
				+  case DXIL::ShaderKind::Vertex:
			
 
				+  case DXIL::ShaderKind::Compute:
			
 
				+    break;
			
 
				+  default:
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  // First record pointers to all instructions in the function:
			
 
				+  std::vector<Instruction*> AllInstructions;
			
 
				+  for (inst_iterator I = inst_begin(DM.GetEntryFunction()), E = inst_end(DM.GetEntryFunction()); I != E; ++I) {
			
 
				+    AllInstructions.push_back(&*I);
			
 
				+  }
			
 
				+
			
 
				+  // Branchless instrumentation requires taking care of a few things:
			
 
				+  // -Each invocation of the shader will be either of interest or not of interest
			
 
				+  //    -If of interest, the offset into the output UAV will be as expected
			
 
				+  //    -If not, the offset is forced to (UAVsize) - (Small Amount), and that output is ignored by the CPU-side code.
			
 
				+  // -The invocation of interest may overflow the UAV. This is handled by taking the modulus of the
			
 
				+  //  output index. Overflow is then detected on the CPU side by checking for the presence of a canary
			
 
				+  //  value at (UAVSize) - (Small Amount) * 2 (which is actually a conservative definition of overflow).
			
 
				+  //
			
 
				+
			
 
				+  Instruction* firstInsertionPt = DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt();
			
 
				+  IRBuilder<> Builder(firstInsertionPt);
			
 
				+
			
 
				+  BuilderContext BC{ M, DM, Ctx, HlslOP, Builder };
			
 
				+
			
 
				+  addUAV(BC);
			
 
				+  auto SystemValues = addRequiredSystemValues(BC);
			
 
				+  addInvocationSelectionProlog(BC, SystemValues);
			
 
				+  addInvocationStartMarker(BC);
			
 
				+
			
 
				+  // Instrument original instructions:
			
 
				+  {
			
 
				+    for (auto & Inst : AllInstructions) {
			
 
				+      // Instrumentation goes after the instruction if it has a return value.
			
 
				+      // Otherwise, the instruction might be a terminator so we HAVE to put the instrumentation before
			
 
				+      if (Inst->getType()->getTypeID() != Type::TypeID::VoidTyID) {
			
 
				+        // Has a return type, so can't be a terminator, so start inserting before the next instruction
			
 
				+        IRBuilder<> Builder(Inst->getNextNode());
			
 
				+        BuilderContext BC2{ BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder };
			
 
				+        addStepDebugEntry(BC2, Inst);
			
 
				+      }
			
 
				+      else {
			
 
				+        // Insert before this instruction
			
 
				+        IRBuilder<> Builder(Inst);
			
 
				+        BuilderContext BC2{ BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder };
			
 
				+        addStepDebugEntry(BC2, Inst);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  DM.ReEmitDxilResources();
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+char DxilDebugInstrumentation::ID = 0;
			
 
				+
			
 
				+ModulePass *llvm::createDxilDebugInstrumentationPass() {
			
 
				+  return new DxilDebugInstrumentation();
			
 
				+}
			
 
				+
			
 
				+INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation", "HLSL DXIL debug instrumentation for PIX", false, false)
			
--- a/tools/clang/test/HLSL/pix/DebugBasic.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugBasic.hlsl
@@ -0,0 +1,29 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s
			
 
				+
			
 
				+// Check that the basic starting header is present:
			
 
				+
			
 
				+// CHECK: %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
			
 
				+// CHECK: %XIndex = fptoui float %XPos to i32
			
 
				+// CHECK: %YIndex = fptoui float %YPos to i32
			
 
				+// CHECK: %CompareToX = icmp eq i32 %XIndex, 0
			
 
				+// CHECK: %CompareToY = icmp eq i32 %YIndex, 0
			
 
				+// CHECK: %ComparePos = and i1 %CompareToX, %CompareToY
			
 
				+// CHECK: %OffsetMultiplicand = zext i1 %ComparePos to i32
			
 
				+// CHECK: %ComplementOfMultiplicand = sub i32 1, %OffsetMultiplicand
			
 
				+// CHECK: %OffsetAddend = mul i32 983040, %ComplementOfMultiplicand
			
 
				+// CHECK: %IncrementForThisInvocation = mul i32 8, %OffsetMultiplicand
			
 
				+
			
 
				+// Check the first instruction was instrumented:
			
 
				+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0, i32 0, i32 undef, i32 undef, i32 %IncrementForThisInvocation)
			
 
				+// CHECK: %MaskedForUAVLimit = and i32 %UAVIncResult, 983039
			
 
				+// CHECK: %MultipliedForInterest = mul i32 %MaskedForUAVLimit, %OffsetMultiplicand
			
 
				+// CHECK: %AddedForInterest = add i32 %MultipliedForInterest, %OffsetAddend
			
 
				+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 %AddedForInterest, i32 0, i32 0, i32 0, i32 0, i32 0, i8 1)
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main() : SV_Target {
			
 
				+    return float4(0,0,0,0);
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugCSParameters.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugCSParameters.hlsl
@@ -0,0 +1,18 @@
 
				+// RUN: %dxc -Emain -Tcs_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,parameter0=10,parameter1=20,parameter2=30 | %FileCheck %s
			
 
				+
			
 
				+// Check that the CS thread IDs are added properly
			
 
				+
			
 
				+// CHECK: %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %ThreadIdX = call i32 @dx.op.threadId.i32(i32 93, i32 0)
			
 
				+// CHECK: %ThreadIdY = call i32 @dx.op.threadId.i32(i32 93, i32 1)
			
 
				+// CHECK: %ThreadIdZ = call i32 @dx.op.threadId.i32(i32 93, i32 2)
			
 
				+// CHECK: %CompareToThreadIdX = icmp eq i32 %ThreadIdX, 10
			
 
				+// CHECK: %CompareToThreadIdY = icmp eq i32 %ThreadIdY, 20
			
 
				+// CHECK: %CompareToThreadIdZ = icmp eq i32 %ThreadIdZ, 30
			
 
				+// CHECK: %CompareXAndY = and i1 %CompareToThreadIdX, %CompareToThreadIdY
			
 
				+// CHECK: %CompareAll = and i1 %CompareXAndY, %CompareToThreadIdZ
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+[numthreads(4, 4, 4)]
			
 
				+void main() {
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugFlowControl.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugFlowControl.hlsl
@@ -0,0 +1,53 @@
 
				+// RUN: %dxc -EFlowControlPS -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s
			
 
				+
			
 
				+// Check that flow control constructs don't break the instrumentation.
			
 
				+
			
 
				+// check instrumentation for one branch. 
			
 
				+
			
 
				+// CHECK:  %UAVIncResult15 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0, i32 0, i32 undef, i32 undef, i32 %IncrementForThisInvocation1)
			
 
				+// CHECK:  %MaskedForUAVLimit16 = and i32 %UAVIncResult15, 983039
			
 
				+// CHECK:  %MultipliedForInterest17 = mul i32 %MaskedForUAVLimit16, %OffsetMultiplicand
			
 
				+// CHECK:  %AddedForInterest18 = add i32 %MultipliedForInterest17, %OffsetAddend
			
 
				+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 %AddedForInterest18, i32 0, i32 64257, i32 0, i32 0, i32 0, i8 1)
			
 
				+// CHECK:  switch i32
			
 
				+// CHECK:    i32 0, label 
			
 
				+// CHECK:    i32 32, label
			
 
				+// CHECK:  ]
			
 
				+
			
 
				+int i32;
			
 
				+float f32;
			
 
				+
			
 
				+float4 Vectorize(float f)
			
 
				+{
			
 
				+  return float4((float)f / 128.f, (float)f / 128.f, (float)f / 128.f, 1.f);
			
 
				+}
			
 
				+
			
 
				+float4 FlowControlPS() : SV_Target
			
 
				+{
			
 
				+  float4 ret = { 0,0,0,1 };
			
 
				+  switch (i32)
			
 
				+  {
			
 
				+  case 0:
			
 
				+    ret = float4(1, 0, 1, 1);
			
 
				+    break;
			
 
				+  case 32:
			
 
				+    ret = Vectorize(f32);
			
 
				+    break;
			
 
				+  }
			
 
				+
			
 
				+  if (i32 > 10)
			
 
				+  {
			
 
				+    ret.r += 0.1f;
			
 
				+  }
			
 
				+  else
			
 
				+  {
			
 
				+    ret.g += 0.1f;
			
 
				+  }
			
 
				+
			
 
				+  for (uint i = 0; i < 3; ++i)
			
 
				+  {
			
 
				+    ret.b += (float)i32 / 10.f;
			
 
				+  }
			
 
				+
			
 
				+  return ret;
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugPSParameters.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugPSParameters.hlsl
@@ -0,0 +1,17 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,parameter0=1,parameter1=2 | %FileCheck %s
			
 
				+
			
 
				+// Check that the basic starting header is present:
			
 
				+
			
 
				+// CHECK: %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
			
 
				+// CHECK: %XIndex = fptoui float %XPos to i32
			
 
				+// CHECK: %YIndex = fptoui float %YPos to i32
			
 
				+// CHECK: %CompareToX = icmp eq i32 %XIndex, 1
			
 
				+// CHECK: %CompareToY = icmp eq i32 %YIndex, 2
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main() : SV_Target{
			
 
				+  return float4(0,0,0,0);
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugPreexistingSVInstance.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugPreexistingSVInstance.hlsl
@@ -0,0 +1,15 @@
 
				+// RUN: %dxc -Emain -Tvs_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s
			
 
				+
			
 
				+// Check that the SV_InstanceId check is present:
			
 
				+
			
 
				+// CHECK: %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %VertId = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %InstanceId = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %CompareToVertId = icmp eq i32 %VertId, 0
			
 
				+// CHECK: %CompareToInstanceId = icmp eq i32 %InstanceId, 0
			
 
				+// CHECK: %CompareBoth = and i1 %CompareToVertId, %CompareToInstanceId
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main(uint id : SV_InstanceId) : SV_Position{
			
 
				+    return float4(id,0,0,0);
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugPreexistingSVPosition.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugPreexistingSVPosition.hlsl
@@ -0,0 +1,18 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s
			
 
				+
			
 
				+// Check that the basic SV_Position check is present:
			
 
				+
			
 
				+// CHECK: %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
			
 
				+// CHECK: %XIndex = fptoui float %XPos to i32
			
 
				+// CHECK: %YIndex = fptoui float %YPos to i32
			
 
				+// CHECK: %CompareToX = icmp eq i32 %XIndex, 0
			
 
				+// CHECK: %CompareToY = icmp eq i32 %YIndex, 0
			
 
				+// CHECK: %ComparePos = and i1 %CompareToX, %CompareToY
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main(float4 pos : SV_Position) : SV_Target {
			
 
				+    return pos;
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugPreexistingSVVertex.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugPreexistingSVVertex.hlsl
@@ -0,0 +1,15 @@
 
				+// RUN: %dxc -Emain -Tvs_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s
			
 
				+
			
 
				+// Check that the vertex id check is present:
			
 
				+
			
 
				+// CHECK:  %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK:  %VertId = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				+// CHECK:  %InstanceId = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
			
 
				+// CHECK:  %CompareToVertId = icmp eq i32 %VertId, 0
			
 
				+// CHECK:  %CompareToInstanceId = icmp eq i32 %InstanceId, 0
			
 
				+// CHECK:  %CompareBoth = and i1 %CompareToVertId, %CompareToInstanceId
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main(uint id : SV_VertexId) : SV_Position{
			
 
				+    return float4(id,0,0,0);
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugUAVSize.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugUAVSize.hlsl
@@ -0,0 +1,13 @@
 
				+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,UAVSize=100000 | %FileCheck %s
			
 
				+
			
 
				+// Check that the UAV size is reflected in the instrumentation. (Should be passed-in size - 64k)
			
 
				+// (The offset here is the "dumping ground" for non-interesting invocations)
			
 
				+// 100,000 - 65.536 = 34,464
			
 
				+
			
 
				+// CHECK: %OffsetAddend = mul i32 34464, %ComplementOfMultiplicand
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main() : SV_Target {
			
 
				+    return float4(0,0,0,0);
			
 
				+}
			
--- a/tools/clang/test/HLSL/pix/DebugVSParameters.hlsl
+++ b/tools/clang/test/HLSL/pix/DebugVSParameters.hlsl
@@ -0,0 +1,17 @@
 
				+// RUN: %dxc -Emain -Tvs_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,parameter0=1,parameter1=2 | %FileCheck %s
			
 
				+
			
 
				+// Check that the instance and vertex id are parsed and present:
			
 
				+
			
 
				+// CHECK: %PIX_DebugUAV_Handle = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %VertId = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %InstanceId = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
			
 
				+// CHECK: %CompareToVertId = icmp eq i32 %VertId, 1
			
 
				+// CHECK: %CompareToInstanceId = icmp eq i32 %InstanceId, 2
			
 
				+// CHECK: %CompareBoth = and i1 %CompareToVertId, %CompareToInstanceId
			
 
				+// CHECK: %OffsetMultiplicand = zext i1 %CompareBoth to i32
			
 
				+
			
 
				+
			
 
				+[RootSignature("")]
			
 
				+float4 main() : SV_Position{
			
 
				+  return float4(0,0,0,0);
			
 
				+}
			
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@@ -430,6 +430,15 @@ public:
 
				   TEST_METHOD(PixConstantColorFromCB)
			
 
				   TEST_METHOD(PixConstantColorFromCBint)
			
 
				   TEST_METHOD(PixForceEarlyZ)
			
 
				+  TEST_METHOD(PixDebugBasic)
			
 
				+  TEST_METHOD(PixDebugUAVSize)
			
 
				+  TEST_METHOD(PixDebugPSParameters)
			
 
				+  TEST_METHOD(PixDebugVSParameters)
			
 
				+  TEST_METHOD(PixDebugCSParameters)
			
 
				+  TEST_METHOD(PixDebugFlowControl)
			
 
				+  TEST_METHOD(PixDebugPreexistingSVPosition)
			
 
				+  TEST_METHOD(PixDebugPreexistingSVVertex)
			
 
				+  TEST_METHOD(PixDebugPreexistingSVInstance)
			
 
				 
			
 
				   TEST_METHOD(CodeGenAbs1)
			
 
				   TEST_METHOD(CodeGenAbs2)
			
@@ -2866,6 +2875,42 @@ TEST_F(CompilerTest, PixForceEarlyZ) {
 
				   CodeGenTestCheck(L"pix\\forceEarlyZ.hlsl");
			
 
				 }
			
 
				 
			
 
				+TEST_F(CompilerTest, PixDebugBasic) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugBasic.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugUAVSize) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugUAVSize.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugPSParameters) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugPSParameters.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugVSParameters) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugVSParameters.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugCSParameters) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugCSParameters.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugFlowControl) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugFlowControl.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugPreexistingSVPosition) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugPreexistingSVPosition.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugPreexistingSVVertex) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugPreexistingSVVertex.hlsl");
			
 
				+}
			
 
				+
			
 
				+TEST_F(CompilerTest, PixDebugPreexistingSVInstance) {
			
 
				+  CodeGenTestCheck(L"pix\\DebugPreexistingSVInstance.hlsl");
			
 
				+}
			
 
				+
			
 
				 TEST_F(CompilerTest, CodeGenAbs1) {
			
 
				   CodeGenTestCheck(L"..\\CodeGenHLSL\\abs1.hlsl");
			
 
				 }
			
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1282,6 +1282,11 @@ class db_dxil(object):
 
				             {'n':'constant-alpha','t':'float','c':1}])
			
 
				         add_pass('hlsl-dxil-remove-discards', 'DxilRemoveDiscards', 'HLSL DXIL Remove all discard instructions', [])
			
 
				         add_pass('hlsl-dxil-force-early-z', 'DxilForceEarlyZ', 'HLSL DXIL Force the early Z global flag, if shader has no discard calls', [])
			
 
				+        add_pass('hlsl-dxil-debug-instrumentation', 'DxilDebugInstrumentation', 'HLSL DXIL debug instrumentation for PIX', [
			
 
				+            {'n':'UAVSize','t':'int','c':1},
			
 
				+            {'n':'parameter0','t':'int','c':1},
			
 
				+            {'n':'parameter1','t':'int','c':1},
			
 
				+            {'n':'parameter2','t':'int','c':1}])
			
 
				         add_pass('hlsl-dxil-reduce-msaa-to-single', 'DxilReduceMSAAToSingleSample', 'HLSL DXIL Reduce all MSAA reads to single-sample reads', [])
			
 
				         add_pass('hlsl-dxilfinalize', 'DxilFinalizeModule', 'HLSL DXIL Finalize Module', [])
			
 
				         add_pass('hlsl-dxilemit', 'DxilEmitMetadata', 'HLSL DXIL Metadata Emit', [])