DxilAddPixelHitInstrumentation.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilAddPixelHitInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Provides a pass to add instrumentation to determine pixel hit count and //
  9. // cost. Used by PIX. //
  10. // //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "dxc/HLSL/DxilGenerationPass.h"
  13. #include "dxc/HLSL/DxilOperations.h"
  14. #include "dxc/HLSL/DxilSignatureElement.h"
  15. #include "dxc/HLSL/DxilModule.h"
  16. #include "dxc/Support/Global.h"
  17. #include "dxc/HLSL/DxilTypeSystem.h"
  18. #include "dxc/HLSL/DxilConstants.h"
  19. #include "dxc/HLSL/DxilInstructions.h"
  20. #include "dxc/HLSL/DxilSpanAllocator.h"
  21. #include "llvm/IR/Instructions.h"
  22. #include "llvm/IR/IntrinsicInst.h"
  23. #include "llvm/IR/InstIterator.h"
  24. #include "llvm/IR/Module.h"
  25. #include "llvm/IR/PassManager.h"
  26. #include "llvm/ADT/BitVector.h"
  27. #include "llvm/Pass.h"
  28. #include "llvm/Transforms/Utils/Local.h"
  29. #include <memory>
  30. #include <unordered_set>
  31. #include <array>
  32. using namespace llvm;
  33. using namespace hlsl;
  34. class DxilAddPixelHitInstrumentation : public ModulePass {
  35. bool ForceEarlyZ = false;
  36. bool AddPixelCost = false;
  37. int RTWidth = 1024;
  38. int NumPixels = 128;
  39. public:
  40. static char ID; // Pass identification, replacement for typeid
  41. explicit DxilAddPixelHitInstrumentation() : ModulePass(ID) {}
  42. const char *getPassName() const override { return "DXIL Constant Color Mod"; }
  43. void applyOptions(PassOptions O) override;
  44. bool runOnModule(Module &M) override;
  45. };
  46. void DxilAddPixelHitInstrumentation::applyOptions(PassOptions O)
  47. {
  48. for (const auto & option : O)
  49. {
  50. if (0 == option.first.compare("force-early-z"))
  51. {
  52. ForceEarlyZ = atoi(option.second.data()) != 0;
  53. }
  54. else if (0 == option.first.compare("rt-width"))
  55. {
  56. RTWidth = atoi(option.second.data());
  57. }
  58. else if (0 == option.first.compare("num-pixels"))
  59. {
  60. NumPixels = atoi(option.second.data());
  61. }
  62. else if (0 == option.first.compare("add-pixel-cost"))
  63. {
  64. AddPixelCost = atoi(option.second.data()) != 0;
  65. }
  66. }
  67. }
  68. bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
  69. {
  70. // This pass adds instrumentation for pixel hit counting and pixel cost.
  71. DxilModule &DM = M.GetOrCreateDxilModule();
  72. LLVMContext & Ctx = M.getContext();
  73. OP *HlslOP = DM.GetOP();
  74. // ForceEarlyZ is incompatible with the discard function (the Z has to be tested/written, and may be written before the shader even runs)
  75. if (ForceEarlyZ)
  76. {
  77. DM.m_ShaderFlags.SetForceEarlyDepthStencil(true);
  78. }
  79. hlsl::DxilSignature & InputSignature = DM.GetInputSignature();
  80. auto & InputElements = InputSignature.GetElements();
  81. unsigned SV_Position_ID;
  82. auto SV_Position = std::find_if(InputElements.begin(), InputElements.end(), [](const std::unique_ptr<DxilSignatureElement> & Element) {
  83. return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; });
  84. // SV_Position, if present, has to have full mask, so we needn't worry
  85. // about the shader having selected components that don't include x or y.
  86. // If not present, we add it.
  87. if ( SV_Position == InputElements.end() ) {
  88. auto SVPosition = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  89. SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, 0, 0);
  90. SVPosition->AppendSemanticIndex(0);
  91. SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn);
  92. SVPosition->SetKind(hlsl::DXIL::SemanticKind::Position);
  93. auto index = InputSignature.AppendElement(std::move(SVPosition));
  94. SV_Position_ID = InputElements[index]->GetID();
  95. }
  96. else {
  97. SV_Position_ID = SV_Position->get()->GetID();
  98. }
  99. auto EntryPointFunction = DM.GetEntryFunction();
  100. auto & EntryBlock = EntryPointFunction->getEntryBlock();
  101. bool HaveInsertedUAV = false;
  102. CallInst *HandleForUAV;
  103. // todo: is it a reasonable assumption that there will be a "Ret" in the entry block, and that these are the only
  104. // points from which the shader can exit (except for a pixel-kill?)
  105. auto & Instructions = EntryBlock.getInstList();
  106. auto It = Instructions.begin();
  107. while(It != Instructions.end()) {
  108. auto ThisInstruction = It++;
  109. LlvmInst_Ret Ret(ThisInstruction);
  110. if (Ret) {
  111. // Check that there is at least one instruction preceding the Ret (no need to instrument it if there isn't)
  112. if (ThisInstruction->getPrevNode() != nullptr) {
  113. // Start adding instructions right before the Ret:
  114. IRBuilder<> Builder(ThisInstruction);
  115. if (!HaveInsertedUAV) {
  116. // Set up a UAV with structure of a single int
  117. SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
  118. llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_CountUAV_Type");
  119. std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
  120. pUAV->SetGlobalName("PIX_CountUAVName");
  121. pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
  122. pUAV->SetID(0);
  123. pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
  124. pUAV->SetSampleCount(1);
  125. pUAV->SetGloballyCoherent(false);
  126. pUAV->SetHasCounter(false);
  127. pUAV->SetCompType(CompType::getI32());
  128. pUAV->SetLowerBound(0);
  129. pUAV->SetRangeSize(1);
  130. pUAV->SetKind(DXIL::ResourceKind::StructuredBuffer);
  131. pUAV->SetElementStride(4);
  132. ID = DM.AddUAV(std::move(pUAV));
  133. // Create handle for the newly-added UAV
  134. Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
  135. Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
  136. Constant* UAVVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
  137. Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
  138. Constant* IndexArg = HlslOP->GetU32Const(0); //
  139. Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
  140. HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
  141. { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
  142. DM.ReEmitDxilResources();
  143. HaveInsertedUAV = true;
  144. }
  145. // ------------------------------------------------------------------------------------------------------------
  146. // Generate instructions to increment (by one) a UAV value corresponding to the pixel currently being rendered
  147. // ------------------------------------------------------------------------------------------------------------
  148. // Useful constants
  149. Constant* Zero32Arg = HlslOP->GetU32Const(0);
  150. Constant* Zero8Arg = HlslOP->GetI8Const(0);
  151. Constant* One32Arg = HlslOP->GetU32Const(1);
  152. Constant* One8Arg = HlslOP->GetI8Const(1);
  153. UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
  154. Constant* NumPixelsArg = HlslOP->GetU32Const(NumPixels);
  155. Constant* NumPixelsMinusOneArg = HlslOP->GetU32Const(NumPixels-1);
  156. // Step 1: Convert SV_POSITION to UINT
  157. Value * XAsInt;
  158. Value * YAsInt;
  159. {
  160. auto LoadInputOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(Ctx));
  161. Constant* LoadInputOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  162. Constant* SV_Pos_ID = HlslOP->GetU32Const(SV_Position_ID);
  163. auto XPos = Builder.CreateCall(LoadInputOpFunc,
  164. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos");
  165. auto YPos = Builder.CreateCall(LoadInputOpFunc,
  166. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos");
  167. XAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(Ctx), "XIndex");
  168. YAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(Ctx), "YIndex");
  169. }
  170. // Step 2: Calculate pixel index
  171. Value * ClampedIndex;
  172. {
  173. Constant* RTWidthArg = HlslOP->GetI32Const(RTWidth);
  174. auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg);
  175. auto Index = Builder.CreateAdd(XAsInt, YOffset);
  176. // Step 3: Clamp to size of UAV to prevent TDR if something goes wrong
  177. auto CompareToLimit = Builder.CreateICmpUGT(Index, NumPixelsMinusOneArg);
  178. ClampedIndex = Builder.CreateSelect(CompareToLimit, NumPixelsMinusOneArg, Index, "Clamped");
  179. }
  180. // Insert the UAV increment instruction:
  181. Function* AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
  182. Constant* AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  183. Constant* AtomicAdd = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  184. {
  185. (void)Builder.CreateCall(AtomicOpFunc, {
  186. AtomicBinOpcode,// i32, ; opcode
  187. HandleForUAV, // %dx.types.Handle, ; resource handle
  188. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  189. ClampedIndex, // i32, ; coordinate c0: index in elements
  190. Zero32Arg, // i32, ; coordinate c1: byte offset into element
  191. Zero32Arg, // i32, ; coordinate c2 (unused)
  192. One32Arg // i32); increment value
  193. }, "UAVIncResult");
  194. }
  195. if (AddPixelCost) {
  196. // ------------------------------------------------------------------------------------------------------------
  197. // Generate instructions to increment a value corresponding to the current pixel in the second half of the UAV,
  198. // by an amount proportional to the estimated average cost of each pixel in the current draw call.
  199. // ------------------------------------------------------------------------------------------------------------
  200. // Step 1: Retrieve weight value from UAV; it will be placed after the range we're writing to
  201. Value * Weight;
  202. {
  203. Function* LoadWeight = HlslOP->GetOpFunc(OP::OpCode::BufferLoad, Type::getInt32Ty(Ctx));
  204. Constant* LoadWeightOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferLoad);
  205. Constant* OffsetIntoUAV = HlslOP->GetU32Const(NumPixels * 2);
  206. auto WeightStruct = Builder.CreateCall(LoadWeight, {
  207. LoadWeightOpcode, // i32 opcode
  208. HandleForUAV, // %dx.types.Handle, ; resource handle
  209. OffsetIntoUAV, // i32 c0: index in elements into UAV
  210. Zero32Arg // i32 c1: byte offset into struct
  211. }, "WeightStruct");
  212. Weight = Builder.CreateExtractValue(WeightStruct, static_cast<uint64_t>(0LL), "Weight");
  213. }
  214. // Step 2: Update write position ("Index") to second half of the UAV
  215. auto OffsetIndex = Builder.CreateAdd(ClampedIndex, NumPixelsArg);
  216. // Step 3: Increment UAV value by the weight
  217. (void)Builder.CreateCall(AtomicOpFunc,{
  218. AtomicBinOpcode, // i32, ; opcode
  219. HandleForUAV, // %dx.types.Handle, ; resource handle
  220. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  221. OffsetIndex, // i32, ; coordinate c0: index in elements
  222. Zero32Arg, // i32, ; coordinate c1: byte offset into element
  223. Zero32Arg, // i32, ; coordinate c2 (unused)
  224. Weight // i32); increment value
  225. }, "UAVIncResult2");
  226. }
  227. }
  228. }
  229. }
  230. bool Modified = false;
  231. return Modified;
  232. }
  233. char DxilAddPixelHitInstrumentation::ID = 0;
  234. ModulePass *llvm::createDxilAddPixelHitInstrumentationPass() {
  235. return new DxilAddPixelHitInstrumentation();
  236. }
  237. INITIALIZE_PASS(DxilAddPixelHitInstrumentation, "hlsl-dxil-add-pixel-hit-instrmentation", "DXIL Count completed PS invocations and costs", false, false)