DxilAddPixelHitInstrumentation.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilAddPixelHitInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Provides a pass to add instrumentation to determine pixel hit count and //
  9. // cost. Used by PIX. //
  10. // //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "dxc/HLSL/DxilGenerationPass.h"
  13. #include "dxc/HLSL/DxilOperations.h"
  14. #include "dxc/HLSL/DxilSignatureElement.h"
  15. #include "dxc/HLSL/DxilModule.h"
  16. #include "dxc/Support/Global.h"
  17. #include "dxc/HLSL/DxilTypeSystem.h"
  18. #include "dxc/HLSL/DxilConstants.h"
  19. #include "dxc/HLSL/DxilInstructions.h"
  20. #include "dxc/HLSL/DxilSpanAllocator.h"
  21. #include "llvm/IR/Instructions.h"
  22. #include "llvm/IR/IntrinsicInst.h"
  23. #include "llvm/IR/InstIterator.h"
  24. #include "llvm/IR/Module.h"
  25. #include "llvm/IR/PassManager.h"
  26. #include "llvm/ADT/BitVector.h"
  27. #include "llvm/Pass.h"
  28. #include "llvm/Transforms/Utils/Local.h"
  29. #include <memory>
  30. #include <unordered_set>
  31. #include <array>
  32. using namespace llvm;
  33. using namespace hlsl;
  34. class DxilAddPixelHitInstrumentation : public ModulePass {
  35. bool ForceEarlyZ = false;
  36. bool AddPixelCost = false;
  37. int RTWidth = 1024;
  38. int NumPixels = 128;
  39. public:
  40. static char ID; // Pass identification, replacement for typeid
  41. explicit DxilAddPixelHitInstrumentation() : ModulePass(ID) {}
  42. const char *getPassName() const override { return "DXIL Constant Color Mod"; }
  43. void applyOptions(PassOptions O) override;
  44. bool runOnModule(Module &M) override;
  45. };
  46. void DxilAddPixelHitInstrumentation::applyOptions(PassOptions O)
  47. {
  48. for (const auto & option : O)
  49. {
  50. if (0 == option.first.compare("force-early-z"))
  51. {
  52. ForceEarlyZ = atoi(option.second.data()) != 0;
  53. }
  54. else if (0 == option.first.compare("rt-width"))
  55. {
  56. RTWidth = atoi(option.second.data());
  57. }
  58. else if (0 == option.first.compare("num-pixels"))
  59. {
  60. NumPixels = atoi(option.second.data());
  61. }
  62. else if (0 == option.first.compare("add-pixel-cost"))
  63. {
  64. AddPixelCost = atoi(option.second.data()) != 0;
  65. }
  66. }
  67. }
  68. bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
  69. {
  70. // This pass adds instrumentation for pixel hit counting and pixel cost.
  71. DxilModule &DM = M.GetOrCreateDxilModule();
  72. LLVMContext & Ctx = M.getContext();
  73. OP *HlslOP = DM.GetOP();
  74. // ForceEarlyZ is incompatible with the discard function (the Z has to be tested/written, and may be written before the shader even runs)
  75. if (ForceEarlyZ)
  76. {
  77. if (HlslOP->GetOpFunc(DXIL::OpCode::Discard, Type::getVoidTy(Ctx))->user_empty())
  78. {
  79. DM.m_ShaderFlags.SetForceEarlyDepthStencil(true);
  80. }
  81. }
  82. hlsl::DxilSignature & InputSignature = DM.GetInputSignature();
  83. auto & InputElements = InputSignature.GetElements();
  84. unsigned SV_Position_ID;
  85. auto SV_Position = std::find_if(InputElements.begin(), InputElements.end(), [](const std::unique_ptr<DxilSignatureElement> & Element) {
  86. return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; });
  87. // SV_Position, if present, has to have full mask, so we needn't worry
  88. // about the shader having selected components that don't include x or y.
  89. // If not present, we add it.
  90. if ( SV_Position == InputElements.end() ) {
  91. auto SVPosition = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  92. SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, 0, 0);
  93. SVPosition->AppendSemanticIndex(0);
  94. SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn);
  95. SVPosition->SetKind(hlsl::DXIL::SemanticKind::Position);
  96. auto index = InputSignature.AppendElement(std::move(SVPosition));
  97. SV_Position_ID = InputElements[index]->GetID();
  98. }
  99. else {
  100. SV_Position_ID = SV_Position->get()->GetID();
  101. }
  102. auto EntryPointFunction = DM.GetEntryFunction();
  103. auto & EntryBlock = EntryPointFunction->getEntryBlock();
  104. bool HaveInsertedUAV = false;
  105. CallInst *HandleForUAV;
  106. // todo: is it a reasonable assumption that there will be a "Ret" in the entry block, and that these are the only
  107. // points from which the shader can exit (except for a pixel-kill?)
  108. auto & Instructions = EntryBlock.getInstList();
  109. auto It = Instructions.begin();
  110. while(It != Instructions.end()) {
  111. auto ThisInstruction = It++;
  112. LlvmInst_Ret Ret(ThisInstruction);
  113. if (Ret) {
  114. // Check that there is at least one instruction preceding the Ret (no need to instrument it if there isn't)
  115. if (ThisInstruction->getPrevNode() != nullptr) {
  116. // Start adding instructions right before the Ret:
  117. IRBuilder<> Builder(ThisInstruction);
  118. if (!HaveInsertedUAV) {
  119. // Set up a UAV with structure of a single int
  120. SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
  121. llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_CountUAV_Type");
  122. std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
  123. pUAV->SetGlobalName("PIX_CountUAVName");
  124. pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
  125. pUAV->SetID(0);
  126. pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
  127. pUAV->SetSampleCount(1);
  128. pUAV->SetGloballyCoherent(false);
  129. pUAV->SetHasCounter(false);
  130. pUAV->SetCompType(CompType::getI32());
  131. pUAV->SetLowerBound(0);
  132. pUAV->SetRangeSize(1);
  133. pUAV->SetKind(DXIL::ResourceKind::StructuredBuffer);
  134. pUAV->SetElementStride(4);
  135. ID = DM.AddUAV(std::move(pUAV));
  136. // Create handle for the newly-added UAV
  137. Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
  138. Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
  139. Constant* UAVVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
  140. Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
  141. Constant* IndexArg = HlslOP->GetU32Const(0); //
  142. Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
  143. HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
  144. { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
  145. DM.ReEmitDxilResources();
  146. HaveInsertedUAV = true;
  147. }
  148. // ------------------------------------------------------------------------------------------------------------
  149. // Generate instructions to increment (by one) a UAV value corresponding to the pixel currently being rendered
  150. // ------------------------------------------------------------------------------------------------------------
  151. // Useful constants
  152. Constant* Zero32Arg = HlslOP->GetU32Const(0);
  153. Constant* Zero8Arg = HlslOP->GetI8Const(0);
  154. Constant* One32Arg = HlslOP->GetU32Const(1);
  155. Constant* One8Arg = HlslOP->GetI8Const(1);
  156. UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
  157. Constant* NumPixelsArg = HlslOP->GetU32Const(NumPixels);
  158. Constant* NumPixelsMinusOneArg = HlslOP->GetU32Const(NumPixels-1);
  159. // Step 1: Convert SV_POSITION to UINT
  160. Value * XAsInt;
  161. Value * YAsInt;
  162. {
  163. auto LoadInputOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(Ctx));
  164. Constant* LoadInputOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  165. Constant* SV_Pos_ID = HlslOP->GetU32Const(SV_Position_ID);
  166. auto XPos = Builder.CreateCall(LoadInputOpFunc,
  167. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos");
  168. auto YPos = Builder.CreateCall(LoadInputOpFunc,
  169. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos");
  170. XAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(Ctx), "XIndex");
  171. YAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(Ctx), "YIndex");
  172. }
  173. // Step 2: Calculate pixel index
  174. Value * ClampedIndex;
  175. {
  176. Constant* RTWidthArg = HlslOP->GetI32Const(RTWidth);
  177. auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg);
  178. auto Index = Builder.CreateAdd(XAsInt, YOffset);
  179. // Step 3: Clamp to size of UAV to prevent TDR if something goes wrong
  180. auto CompareToLimit = Builder.CreateICmpUGT(Index, NumPixelsMinusOneArg);
  181. ClampedIndex = Builder.CreateSelect(CompareToLimit, NumPixelsMinusOneArg, Index, "Clamped");
  182. }
  183. // Insert the UAV increment instruction:
  184. Function* AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
  185. Constant* AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  186. Constant* AtomicAdd = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  187. {
  188. (void)Builder.CreateCall(AtomicOpFunc, {
  189. AtomicBinOpcode,// i32, ; opcode
  190. HandleForUAV, // %dx.types.Handle, ; resource handle
  191. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  192. ClampedIndex, // i32, ; coordinate c0: index in elements
  193. Zero32Arg, // i32, ; coordinate c1: byte offset into element
  194. Zero32Arg, // i32, ; coordinate c2 (unused)
  195. One32Arg // i32); increment value
  196. }, "UAVIncResult");
  197. }
  198. if (AddPixelCost) {
  199. // ------------------------------------------------------------------------------------------------------------
  200. // Generate instructions to increment a value corresponding to the current pixel in the second half of the UAV,
  201. // by an amount proportional to the estimated average cost of each pixel in the current draw call.
  202. // ------------------------------------------------------------------------------------------------------------
  203. // Step 1: Retrieve weight value from UAV; it will be placed after the range we're writing to
  204. Value * Weight;
  205. {
  206. Function* LoadWeight = HlslOP->GetOpFunc(OP::OpCode::BufferLoad, Type::getInt32Ty(Ctx));
  207. Constant* LoadWeightOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferLoad);
  208. Constant* OffsetIntoUAV = HlslOP->GetU32Const(NumPixels * 2);
  209. auto WeightStruct = Builder.CreateCall(LoadWeight, {
  210. LoadWeightOpcode, // i32 opcode
  211. HandleForUAV, // %dx.types.Handle, ; resource handle
  212. OffsetIntoUAV, // i32 c0: index in elements into UAV
  213. Zero32Arg // i32 c1: byte offset into struct
  214. }, "WeightStruct");
  215. Weight = Builder.CreateExtractValue(WeightStruct, static_cast<uint64_t>(0LL), "Weight");
  216. }
  217. // Step 2: Update write position ("Index") to second half of the UAV
  218. auto OffsetIndex = Builder.CreateAdd(ClampedIndex, NumPixelsArg);
  219. // Step 3: Increment UAV value by the weight
  220. (void)Builder.CreateCall(AtomicOpFunc,{
  221. AtomicBinOpcode, // i32, ; opcode
  222. HandleForUAV, // %dx.types.Handle, ; resource handle
  223. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  224. OffsetIndex, // i32, ; coordinate c0: index in elements
  225. Zero32Arg, // i32, ; coordinate c1: byte offset into element
  226. Zero32Arg, // i32, ; coordinate c2 (unused)
  227. Weight // i32); increment value
  228. }, "UAVIncResult2");
  229. }
  230. }
  231. }
  232. }
  233. bool Modified = false;
  234. return Modified;
  235. }
  236. char DxilAddPixelHitInstrumentation::ID = 0;
  237. ModulePass *llvm::createDxilAddPixelHitInstrumentationPass() {
  238. return new DxilAddPixelHitInstrumentation();
  239. }
  240. INITIALIZE_PASS(DxilAddPixelHitInstrumentation, "hlsl-dxil-add-pixel-hit-instrmentation", "DXIL Count completed PS invocations and costs", false, false)