Bladeren bron

PIX: Pixel count/cost pass (#457)

The pass implements functionality for PIX for the "pixel cost", "depth complexity" and "overdraw" visualizers. You can probably infer what the pass does from the names "overdraw" and "depth complexity": For each pixel rendered it increments a corresponding counter in a UAV of a buffer that is the same size as the render target. The "pixel cost" pass does the same thing, only the increment is a weight value calculated from the total cost of the draw call, as derived from PIX's GPU-side profiling system.
Jeff Noyle 8 jaren geleden
bovenliggende
commit
47226d0422

+ 2 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -52,6 +52,7 @@ ModulePass *createDxilLegalizeStaticResourceUsePass();
 ModulePass *createDxilLegalizeEvalOperationsPass();
 FunctionPass *createDxilLegalizeSampleOffsetPass();
 FunctionPass *createSimplifyInstPass();
+ModulePass *createDxilAddPixelHitInstrumentationPass();
 ModulePass *createDxilOutputColorBecomesConstantPass();
 ModulePass *createDxilRemoveDiscardsPass();
 ModulePass *createDxilReduceMSAAToSingleSamplePass();
@@ -71,6 +72,7 @@ void initializeDxilLegalizeStaticResourceUsePassPass(llvm::PassRegistry&);
 void initializeDxilLegalizeEvalOperationsPass(llvm::PassRegistry&);
 void initializeDxilLegalizeSampleOffsetPassPass(llvm::PassRegistry&);
 void initializeSimplifyInstPass(llvm::PassRegistry&);
+void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry&);
 void initializeDxilOutputColorBecomesConstantPass(llvm::PassRegistry&);
 void initializeDxilRemoveDiscardsPass(llvm::PassRegistry&);
 void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&);

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -3,6 +3,7 @@
 add_llvm_library(LLVMHLSL
   ComputeViewIdState.cpp
   ControlDependence.cpp
+  DxilAddPixelHitInstrumentation.cpp
   DxilCBuffer.cpp
   DxilCompType.cpp
   DxilCondenseResources.cpp

+ 9 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -82,6 +82,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDCEPass(Registry);
     initializeDSEPass(Registry);
     initializeDeadInstEliminationPass(Registry);
+    initializeDxilAddPixelHitInstrumentationPass(Registry);
     initializeDxilCondenseResourcesPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
     initializeDxilEmitMetadataPass(Registry);
@@ -170,6 +171,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   static const LPCSTR AlwaysInlinerArgs[] = { "InsertLifetime", "InlineThreshold" };
   static const LPCSTR ArgPromotionArgs[] = { "maxElements" };
   static const LPCSTR CFGSimplifyPassArgs[] = { "Threshold", "Ftor", "bonus-inst-threshold" };
+  static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "num-pixels" };
   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "ReplaceAllVectors" };
@@ -200,6 +202,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   if (strcmp(passName, "always-inline") == 0) return ArrayRef<LPCSTR>(AlwaysInlinerArgs, _countof(AlwaysInlinerArgs));
   if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
   if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
+  if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
@@ -237,6 +240,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   static const LPCSTR AlwaysInlinerArgs[] = { "Insert @llvm.lifetime intrinsics", "Insert @llvm.lifetime intrinsics" };
   static const LPCSTR ArgPromotionArgs[] = { "None" };
   static const LPCSTR CFGSimplifyPassArgs[] = { "None", "None", "Control the number of bonus instructions (default = 1)" };
+  static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None" };
   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
   static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "None" };
@@ -267,6 +271,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   if (strcmp(passName, "always-inline") == 0) return ArrayRef<LPCSTR>(AlwaysInlinerArgs, _countof(AlwaysInlinerArgs));
   if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
   if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
+  if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
   if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef<LPCSTR>(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs));
@@ -319,6 +324,7 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("TIRA")
     ||  S.equals("TLIImpl")
     ||  S.equals("Threshold")
+    ||  S.equals("add-pixel-cost")
     ||  S.equals("bonus-inst-threshold")
     ||  S.equals("constant-alpha")
     ||  S.equals("constant-blue")
@@ -330,6 +336,7 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("enable-scoped-noalias")
     ||  S.equals("enable-tbaa")
     ||  S.equals("float2int-max-integer-bw")
+    ||  S.equals("force-early-z")
     ||  S.equals("force-ssa-updater")
     ||  S.equals("jump-threading-threshold")
     ||  S.equals("likely-branch-weight")
@@ -344,10 +351,12 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("mod-mode")
     ||  S.equals("no-discriminators")
     ||  S.equals("noloads")
+    ||  S.equals("num-pixels")
     ||  S.equals("pragma-unroll-threshold")
     ||  S.equals("reroll-num-tolerated-failed-matches")
     ||  S.equals("rewrite-map-file")
     ||  S.equals("rotation-max-header-size")
+    ||  S.equals("rt-width")
     ||  S.equals("sample-profile-file")
     ||  S.equals("sample-profile-max-propagate-iterations")
     ||  S.equals("sroa-random-shuffle-slices")

+ 283 - 0
lib/HLSL/DxilAddPixelHitInstrumentation.cpp

@@ -0,0 +1,283 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilAddPixelHitInstrumentation.cpp                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides a pass to add instrumentation to determine pixel hit count and   //
+// cost. Used by PIX.                                                        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/DxilOperations.h"
+#include "dxc/HLSL/DxilSignatureElement.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/Support/Global.h"
+#include "dxc/HLSL/DxilTypeSystem.h"
+#include "dxc/HLSL/DxilConstants.h"
+#include "dxc/HLSL/DxilInstructions.h"
+#include "dxc/HLSL/DxilSpanAllocator.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <memory>
+#include <unordered_set>
+#include <array>
+
+using namespace llvm;
+using namespace hlsl;
+
+class DxilAddPixelHitInstrumentation : public ModulePass {
+
+  bool ForceEarlyZ = false;
+  bool AddPixelCost = false;
+  int RTWidth = 1024;
+  int NumPixels = 128;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilAddPixelHitInstrumentation() : ModulePass(ID) {}
+  const char *getPassName() const override { return "DXIL Constant Color Mod"; }
+  void applyOptions(PassOptions O) override;
+  bool runOnModule(Module &M) override;
+};
+
+void DxilAddPixelHitInstrumentation::applyOptions(PassOptions O)
+{
+  for (const auto & option : O)
+  {
+    if (0 == option.first.compare("force-early-z"))
+    {
+      ForceEarlyZ = atoi(option.second.data()) != 0;
+    }
+    else if (0 == option.first.compare("rt-width"))
+    {
+      RTWidth = atoi(option.second.data());
+    }
+    else if (0 == option.first.compare("num-pixels"))
+    {
+      NumPixels = atoi(option.second.data());
+    }
+    else if (0 == option.first.compare("add-pixel-cost"))
+    {
+      AddPixelCost = atoi(option.second.data()) != 0;
+    }
+  }
+}
+
+bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
+{
+  // This pass adds instrumentation for pixel hit counting and pixel cost.
+
+  DxilModule &DM = M.GetOrCreateDxilModule();
+  LLVMContext & Ctx = M.getContext();
+  OP *HlslOP = DM.GetOP();
+
+  // ForceEarlyZ is incompatible with the discard function (the Z has to be tested/written, and may be written before the shader even runs)
+  if (ForceEarlyZ)
+  {
+    if (HlslOP->GetOpFunc(DXIL::OpCode::Discard, Type::getVoidTy(Ctx))->user_empty())
+    {
+      DM.m_ShaderFlags.SetForceEarlyDepthStencil(true);
+    }
+  }
+  
+  hlsl::DxilSignature & InputSignature = DM.GetInputSignature();
+
+  auto & InputElements = InputSignature.GetElements();
+
+  unsigned SV_Position_ID;
+
+  auto SV_Position = std::find_if(InputElements.begin(), InputElements.end(), [](const std::unique_ptr<DxilSignatureElement> & Element) {
+    return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; });
+
+  // SV_Position, if present, has to have full mask, so we needn't worry 
+  // about the shader having selected components that don't include x or y.
+  // If not present, we add it.
+  if ( SV_Position == InputElements.end() ) {
+    auto SVPosition = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
+    SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, 0, 0);
+    SVPosition->AppendSemanticIndex(0);
+    SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn);
+    SVPosition->SetKind(hlsl::DXIL::SemanticKind::Position);
+
+    auto index = InputSignature.AppendElement(std::move(SVPosition));
+    SV_Position_ID = InputElements[index]->GetID();
+  }
+  else {
+    SV_Position_ID = SV_Position->get()->GetID();
+  }
+
+  auto EntryPointFunction = DM.GetEntryFunction();
+
+  auto & EntryBlock = EntryPointFunction->getEntryBlock();
+  bool HaveInsertedUAV = false;
+
+  CallInst *HandleForUAV;
+
+  // todo: is it a reasonable assumption that there will be a "Ret" in the entry block, and that these are the only
+  // points from which the shader can exit (except for a pixel-kill?)
+  auto & Instructions = EntryBlock.getInstList();
+  auto It = Instructions.begin();
+  while(It != Instructions.end()) {
+    auto ThisInstruction = It++;
+    LlvmInst_Ret Ret(ThisInstruction);
+    if (Ret) {
+      // Check that there is at least one instruction preceding the Ret (no need to instrument it if there isn't)
+      if (ThisInstruction->getPrevNode() != nullptr) {
+
+        // Start adding instructions right before the Ret:
+        IRBuilder<> Builder(ThisInstruction);
+
+        if (!HaveInsertedUAV) {
+
+          // Set up a UAV with structure of a single int
+          SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
+          llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_CountUAV_Type");
+          std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
+          pUAV->SetGlobalName("PIX_CountUAVName");
+          pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
+          pUAV->SetID(0);
+          pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
+          pUAV->SetSampleCount(1);
+          pUAV->SetGloballyCoherent(false);
+          pUAV->SetHasCounter(false);
+          pUAV->SetCompType(CompType::getI32());
+          pUAV->SetLowerBound(0);
+          pUAV->SetRangeSize(1);
+          pUAV->SetKind(DXIL::ResourceKind::StructuredBuffer);
+          pUAV->SetElementStride(4);
+
+          ID = DM.AddUAV(std::move(pUAV));
+
+          // Create handle for the newly-added UAV
+          Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+          Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
+          Constant* UAVVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
+          Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
+          Constant* IndexArg = HlslOP->GetU32Const(0); // 
+          Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
+          HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
+            { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
+
+          DM.ReEmitDxilResources();
+
+          HaveInsertedUAV = true;
+        }
+
+        // ------------------------------------------------------------------------------------------------------------
+        // Generate instructions to increment (by one) a UAV value corresponding to the pixel currently being rendered
+        // ------------------------------------------------------------------------------------------------------------
+
+        // Useful constants
+        Constant* Zero32Arg = HlslOP->GetU32Const(0);
+        Constant* Zero8Arg = HlslOP->GetI8Const(0);
+        Constant* One32Arg = HlslOP->GetU32Const(1);
+        Constant* One8Arg = HlslOP->GetI8Const(1);
+        UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+        Constant* NumPixelsArg = HlslOP->GetU32Const(NumPixels);
+        Constant* NumPixelsMinusOneArg = HlslOP->GetU32Const(NumPixels-1);
+
+        // Step 1: Convert SV_POSITION to UINT          
+        Value * XAsInt;
+        Value * YAsInt;
+        {
+          auto LoadInputOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(Ctx));
+          Constant* LoadInputOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
+          Constant*  SV_Pos_ID = HlslOP->GetU32Const(SV_Position_ID);
+          auto XPos = Builder.CreateCall(LoadInputOpFunc,
+          { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos");
+          auto YPos = Builder.CreateCall(LoadInputOpFunc,
+          { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos");
+
+          XAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(Ctx), "XIndex");
+          YAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(Ctx), "YIndex");
+        }
+
+        // Step 2: Calculate pixel index
+        Value * ClampedIndex;
+        {
+          Constant* RTWidthArg = HlslOP->GetI32Const(RTWidth);
+          auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg);
+          auto Index = Builder.CreateAdd(XAsInt, YOffset);
+
+          // Step 3: Clamp to size of UAV to prevent TDR if something goes wrong
+          auto CompareToLimit = Builder.CreateICmpUGT(Index, NumPixelsMinusOneArg);
+          ClampedIndex = Builder.CreateSelect(CompareToLimit, NumPixelsMinusOneArg, Index, "Clamped");
+        }
+
+        // Insert the UAV increment instruction:
+        Function* AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
+        Constant* AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
+        Constant* AtomicAdd = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
+        {
+          (void)Builder.CreateCall(AtomicOpFunc, {
+            AtomicBinOpcode,// i32, ; opcode
+            HandleForUAV,   // %dx.types.Handle, ; resource handle
+            AtomicAdd,      // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
+            ClampedIndex,   // i32, ; coordinate c0: index in elements
+            Zero32Arg,      // i32, ; coordinate c1: byte offset into element
+            Zero32Arg,      // i32, ; coordinate c2 (unused)
+            One32Arg        // i32); increment value
+          }, "UAVIncResult");
+        }
+
+        if (AddPixelCost) {
+          // ------------------------------------------------------------------------------------------------------------
+          // Generate instructions to increment a value corresponding to the current pixel in the second half of the UAV, 
+          // by an amount proportional to the estimated average cost of each pixel in the current draw call.
+          // ------------------------------------------------------------------------------------------------------------
+
+          // Step 1: Retrieve weight value from UAV; it will be placed after the range we're writing to
+          Value * Weight;
+          {
+            Function* LoadWeight = HlslOP->GetOpFunc(OP::OpCode::BufferLoad, Type::getInt32Ty(Ctx));
+            Constant* LoadWeightOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferLoad);
+            Constant* OffsetIntoUAV = HlslOP->GetU32Const(NumPixels * 2);
+            auto WeightStruct = Builder.CreateCall(LoadWeight, {
+              LoadWeightOpcode, // i32 opcode
+              HandleForUAV,     // %dx.types.Handle, ; resource handle
+              OffsetIntoUAV,    // i32 c0: index in elements into UAV
+              Zero32Arg         // i32 c1: byte offset into struct
+            }, "WeightStruct");
+            Weight = Builder.CreateExtractValue(WeightStruct, static_cast<uint64_t>(0LL), "Weight");
+          }
+
+          // Step 2: Update write position ("Index") to second half of the UAV 
+          auto OffsetIndex = Builder.CreateAdd(ClampedIndex, NumPixelsArg);
+
+          // Step 3: Increment UAV value by the weight
+          (void)Builder.CreateCall(AtomicOpFunc,{
+            AtomicBinOpcode,          // i32, ; opcode
+            HandleForUAV,   // %dx.types.Handle, ; resource handle
+            AtomicAdd,      // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
+            OffsetIndex,    // i32, ; coordinate c0: index in elements
+            Zero32Arg,      // i32, ; coordinate c1: byte offset into element
+            Zero32Arg,      // i32, ; coordinate c2 (unused)
+            Weight          // i32); increment value
+          }, "UAVIncResult2");
+        }
+      }
+    }
+  }
+
+  bool Modified = false;
+
+  return Modified;
+}
+
+char DxilAddPixelHitInstrumentation::ID = 0;
+
+ModulePass *llvm::createDxilAddPixelHitInstrumentationPass() {
+  return new DxilAddPixelHitInstrumentation();
+}
+
+INITIALIZE_PASS(DxilAddPixelHitInstrumentation, "hlsl-dxil-add-pixel-hit-instrmentation", "DXIL Count completed PS invocations and costs", false, false)

+ 3 - 3
lib/HLSL/DxilModule.cpp

@@ -1364,9 +1364,9 @@ void DxilModule::ReEmitDxilResources() {
   const llvm::MDOperand *pSignatures, *pResources, *pProperties;
   m_pMDHelper->GetDxilEntryPoint(pEntries->getOperand(0), pEntryFunc, EntryName, pSignatures, pResources, pProperties);
 
-  MDTuple *pMDSig = pSignatures? (MDTuple*)pSignatures->get():nullptr;
-  MDTuple *pMDProperties = pProperties ? (MDTuple*)pProperties->get():nullptr;
-  MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(pEntryFunc, EntryName, pMDSig, pNewResource, pMDProperties);
+  MDTuple *pMDSignatures = m_pMDHelper->EmitDxilSignatures(*m_EntrySignature);
+  MDTuple *pMDProperties = EmitDxilShaderProperties();
+  MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(pEntryFunc, EntryName, pMDSignatures, pNewResource, pMDProperties);
   vector<MDNode *> Entries;
   Entries.emplace_back(pEntry);
   m_pMDHelper->UpdateDxilEntryPoints(Entries);

+ 24 - 0
tools/clang/test/HLSL/pix/pixelCounter.hlsl

@@ -0,0 +1,24 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64 | %FileCheck %s
+
+// Check that the input semantic was read correctly:
+// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+
+// The cast-to-int:
+// CHECK: %XIndex = fptoui float %XPos to i32
+// CHECK: %YIndex = fptoui float %YPos to i32
+
+// Calculation of offset:
+// CHECK: %4 = mul i32 %YIndex, 16
+// CHECK: %5 = add i32 %XIndex, %4
+// CHECK: %6 = icmp ugt i32 %5, 63
+
+// Clamp to UAV size:
+// CHECK: %Clamped = select i1 %6, i32 63, i32 %5
+
+// Check the write to the UAV was emitted:
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+
+float4 main(float4 pos : SV_Position) : SV_Target {
+  return pos;
+}

+ 16 - 0
tools/clang/test/HLSL/pix/pixelCounterAddPixelCost.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,add-pixel-cost=1 | %FileCheck %s
+
+// Check the write to the UAV was emitted:
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+
+// Check for pixel cost instructions:
+// CHECK: %WeightStruct = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %PIX_CountUAV_Handle, i32 128, i32 0)
+// CHECK: %Weight = extractvalue %dx.types.ResRet.i32 %WeightStruct, 0
+// CHECK: %7 = add i32 %Clamped, 64
+// CHECK: %UAVIncResult2 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %7, i32 0, i32 0, i32 %Weight)
+
+
+
+float4 main(float4 pos : SV_Position) : SV_Target {
+  return pos;
+}

+ 18 - 0
tools/clang/test/HLSL/pix/pixelCounterEarlyZ.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,force-early-z=1 | %FileCheck %s
+
+// Check the write to the UAV was emitted:
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+
+// Early z flag value is 8. The flags are stored in the last entry in the entry function description record. See:
+// https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-properties-and-capabilities
+// CHECK: !{i32 0, i64 8}
+// Make sure it's the last entry:
+// CHECK-NOT: !{
+
+float4 main(float4 pos : SV_Position) : SV_Target {
+  return pos;
+}
+
+
+
+

+ 13 - 0
tools/clang/test/HLSL/pix/pixelCounterInappropriateEarlyZ.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,force-early-z=1 | %FileCheck %s
+
+// Check the write to the UAV was emitted:
+// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1)
+
+// Early z should NOT be present even though we asked for it, due to the discard instruction. That 8 (with its key value of 0) at the end should be a 0
+// CHECK-NOT: !{i32 0, i64 8}
+
+float4 main(float4 pos : SV_Position) : SV_Target{
+  discard;
+  return pos;
+}
+

+ 14 - 0
tools/clang/test/HLSL/pix/pixelCounterNoSvPosition.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64 | %FileCheck %s
+
+// Check the read from SV_Position was added:
+// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
+
+// Check the SV_Position meta-data was added:
+// CHECK: !{i32 0, !"SV_Position", i8 9, i8 3,
+
+float4 main() : SV_Target{
+  discard;
+  return float4(0,0,0,0);
+}
+

+ 26 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -407,6 +407,7 @@ public:
   TEST_METHOD(CompileBadHlslThenFail)
   TEST_METHOD(CompileLegacyShaderModelThenFail)
   TEST_METHOD(CompileWhenRecursiveAlbeitStaticTermThenFail)
+
   TEST_METHOD(CompileWhenRecursiveThenFail)
 
   TEST_METHOD(CompileHlsl2015ThenFail)
@@ -420,6 +421,11 @@ public:
 
   TEST_METHOD(PixMSAAToSample0)
   TEST_METHOD(PixRemoveDiscards)
+  TEST_METHOD(PixPixelCounter)
+  TEST_METHOD(PixPixelCounterEarlyZ)
+  TEST_METHOD(PixPixelCounterNoSvPosition)
+  TEST_METHOD(PixPixelCounterInappropriateEarlyZ)
+  TEST_METHOD(PixPixelCounterAddPixelCost)
   TEST_METHOD(PixConstantColor)
   TEST_METHOD(PixConstantColorInt)
   TEST_METHOD(PixConstantColorMRT)
@@ -2723,6 +2729,26 @@ TEST_F(CompilerTest, PixRemoveDiscards) {
   CodeGenTestCheck(L"pix\\removeDiscards.hlsl");
 }
 
+TEST_F(CompilerTest, PixPixelCounter) {
+  CodeGenTestCheck(L"pix\\pixelCounter.hlsl");
+}
+
+TEST_F(CompilerTest, PixPixelCounterEarlyZ) {
+  CodeGenTestCheck(L"pix\\pixelCounterEarlyZ.hlsl");
+}
+
+TEST_F(CompilerTest, PixPixelCounterNoSvPosition) {
+  CodeGenTestCheck(L"pix\\pixelCounterNoSvPosition.hlsl");
+}
+
+TEST_F(CompilerTest, PixPixelCounterInappropriateEarlyZ) {
+  CodeGenTestCheck(L"pix\\pixelCounterInappropriateEarlyZ.hlsl");
+}
+
+TEST_F(CompilerTest, PixPixelCounterAddPixelCost) {
+  CodeGenTestCheck(L"pix\\pixelCounterAddPixelCost.hlsl");
+}
+
 TEST_F(CompilerTest, PixConstantColor) {
   CodeGenTestCheck(L"pix\\constantcolor.hlsl");
 }

+ 5 - 0
utils/hct/hctdb.py

@@ -1268,6 +1268,11 @@ class db_dxil(object):
         add_pass('resource-handle', 'ResourceToHandle', 'Lower resource into handle', [])
         add_pass('hlsl-dxil-condense', 'DxilCondenseResources', 'DXIL Condense Resources', [])
         add_pass('hlsl-dxil-eliminate-output-dynamic', 'DxilEliminateOutputDynamicIndexing', 'DXIL eliminate ouptut dynamic indexing', [])
+        add_pass('hlsl-dxil-add-pixel-hit-instrmentation', 'DxilAddPixelHitInstrumentation', 'DXIL Count completed PS invocations and costs', [
+            {'n':'force-early-z','t':'int','c':1},
+            {'n':'add-pixel-cost','t':'int','c':1},
+            {'n':'rt-width','t':'int','c':1},
+            {'n':'num-pixels','t':'int','c':1}])
         add_pass('hlsl-dxil-constantColor', 'DxilOutputColorBecomesConstant', 'DXIL Constant Color Mod', [
             {'n':'mod-mode','t':'int','c':1},
             {'n':'constant-red','t':'float','c':1},