DxilAddPixelHitInstrumentation.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilAddPixelHitInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Provides a pass to add instrumentation to determine pixel hit count and //
  9. // cost. Used by PIX. //
  10. // //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "dxc/HLSL/DxilGenerationPass.h"
  13. #include "dxc/HLSL/DxilOperations.h"
  14. #include "dxc/HLSL/DxilSignatureElement.h"
  15. #include "dxc/HLSL/DxilModule.h"
  16. #include "dxc/Support/Global.h"
  17. #include "dxc/HLSL/DxilTypeSystem.h"
  18. #include "dxc/HLSL/DxilConstants.h"
  19. #include "dxc/HLSL/DxilInstructions.h"
  20. #include "dxc/HLSL/DxilSpanAllocator.h"
  21. #include "llvm/IR/Instructions.h"
  22. #include "llvm/IR/IntrinsicInst.h"
  23. #include "llvm/IR/InstIterator.h"
  24. #include "llvm/IR/Module.h"
  25. #include "llvm/IR/PassManager.h"
  26. #include "llvm/ADT/BitVector.h"
  27. #include "llvm/Pass.h"
  28. #include "llvm/Transforms/Utils/Local.h"
  29. #include <memory>
  30. #include <unordered_set>
  31. #include <array>
  32. using namespace llvm;
  33. using namespace hlsl;
  34. class DxilAddPixelHitInstrumentation : public ModulePass {
  35. bool ForceEarlyZ = false;
  36. bool AddPixelCost = false;
  37. int RTWidth = 1024;
  38. int NumPixels = 128;
  39. int SVPositionIndex = -1;
  40. public:
  41. static char ID; // Pass identification, replacement for typeid
  42. explicit DxilAddPixelHitInstrumentation() : ModulePass(ID) {}
  43. const char *getPassName() const override { return "DXIL Constant Color Mod"; }
  44. void applyOptions(PassOptions O) override;
  45. bool runOnModule(Module &M) override;
  46. };
  47. void DxilAddPixelHitInstrumentation::applyOptions(PassOptions O)
  48. {
  49. for (const auto & option : O)
  50. {
  51. if (0 == option.first.compare("force-early-z"))
  52. {
  53. ForceEarlyZ = atoi(option.second.data()) != 0;
  54. }
  55. else if (0 == option.first.compare("rt-width"))
  56. {
  57. RTWidth = atoi(option.second.data());
  58. }
  59. else if (0 == option.first.compare("num-pixels"))
  60. {
  61. NumPixels = atoi(option.second.data());
  62. }
  63. else if (0 == option.first.compare("add-pixel-cost"))
  64. {
  65. AddPixelCost = atoi(option.second.data()) != 0;
  66. }
  67. else if (0 == option.first.compare("sv-position-index"))
  68. {
  69. SVPositionIndex = atoi(option.second.data());
  70. }
  71. }
  72. }
  73. bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
  74. {
  75. // This pass adds instrumentation for pixel hit counting and pixel cost.
  76. DxilModule &DM = M.GetOrCreateDxilModule();
  77. LLVMContext & Ctx = M.getContext();
  78. OP *HlslOP = DM.GetOP();
  79. // ForceEarlyZ is incompatible with the discard function (the Z has to be tested/written, and may be written before the shader even runs)
  80. if (ForceEarlyZ)
  81. {
  82. DM.m_ShaderFlags.SetForceEarlyDepthStencil(true);
  83. }
  84. hlsl::DxilSignature & InputSignature = DM.GetInputSignature();
  85. auto & InputElements = InputSignature.GetElements();
  86. unsigned SV_Position_ID;
  87. auto SV_Position = std::find_if(InputElements.begin(), InputElements.end(), [](const std::unique_ptr<DxilSignatureElement> & Element) {
  88. return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; });
  89. // SV_Position, if present, has to have full mask, so we needn't worry
  90. // about the shader having selected components that don't include x or y.
  91. // If not present, we add it.
  92. if ( SV_Position == InputElements.end() ) {
  93. auto SVPosition = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  94. SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, SVPositionIndex == -1 ? 0 : SVPositionIndex, 0);
  95. SVPosition->AppendSemanticIndex(0);
  96. SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn);
  97. SVPosition->SetKind(hlsl::DXIL::SemanticKind::Position);
  98. auto index = InputSignature.AppendElement(std::move(SVPosition));
  99. SV_Position_ID = InputElements[index]->GetID();
  100. }
  101. else {
  102. SV_Position_ID = SV_Position->get()->GetID();
  103. }
  104. auto EntryPointFunction = DM.GetEntryFunction();
  105. auto & EntryBlock = EntryPointFunction->getEntryBlock();
  106. CallInst *HandleForUAV;
  107. {
  108. IRBuilder<> Builder(DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
  109. unsigned int UAVResourceHandle = static_cast<unsigned int>(DM.GetUAVs().size());
  110. // Set up a UAV with structure of a single int
  111. SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(Ctx) };
  112. llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "class.RWStructuredBuffer");
  113. std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
  114. pUAV->SetGlobalName("PIX_CountUAVName");
  115. pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
  116. pUAV->SetID(UAVResourceHandle);
  117. pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
  118. pUAV->SetSampleCount(1);
  119. pUAV->SetGloballyCoherent(false);
  120. pUAV->SetHasCounter(false);
  121. pUAV->SetCompType(CompType::getI32());
  122. pUAV->SetLowerBound(0);
  123. pUAV->SetRangeSize(1);
  124. pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
  125. pUAV->SetRW(true);
  126. auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
  127. if (pAnnotation == nullptr)
  128. {
  129. pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
  130. pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
  131. pAnnotation->GetFieldAnnotation(0).SetCompType(hlsl::DXIL::ComponentType::I32);
  132. pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
  133. }
  134. ID = DM.AddUAV(std::move(pUAV));
  135. assert(ID == UAVResourceHandle);
  136. // Create handle for the newly-added UAV
  137. Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
  138. Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
  139. Constant* UAVArg = HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
  140. Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
  141. Constant* IndexArg = HlslOP->GetU32Const(0); //
  142. Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false
  143. HandleForUAV = Builder.CreateCall(CreateHandleOpFunc,
  144. { CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle");
  145. DM.ReEmitDxilResources();
  146. }
  147. // todo: is it a reasonable assumption that there will be a "Ret" in the entry block, and that these are the only
  148. // points from which the shader can exit (except for a pixel-kill?)
  149. auto & Instructions = EntryBlock.getInstList();
  150. auto It = Instructions.begin();
  151. while(It != Instructions.end()) {
  152. auto ThisInstruction = It++;
  153. LlvmInst_Ret Ret(ThisInstruction);
  154. if (Ret) {
  155. // Check that there is at least one instruction preceding the Ret (no need to instrument it if there isn't)
  156. if (ThisInstruction->getPrevNode() != nullptr) {
  157. // Start adding instructions right before the Ret:
  158. IRBuilder<> Builder(ThisInstruction);
  159. // ------------------------------------------------------------------------------------------------------------
  160. // Generate instructions to increment (by one) a UAV value corresponding to the pixel currently being rendered
  161. // ------------------------------------------------------------------------------------------------------------
  162. // Useful constants
  163. Constant* Zero32Arg = HlslOP->GetU32Const(0);
  164. Constant* Zero8Arg = HlslOP->GetI8Const(0);
  165. Constant* One32Arg = HlslOP->GetU32Const(1);
  166. Constant* One8Arg = HlslOP->GetI8Const(1);
  167. UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
  168. Constant* NumPixelsByteOffsetArg = HlslOP->GetU32Const(NumPixels * 4);
  169. // Step 1: Convert SV_POSITION to UINT
  170. Value * XAsInt;
  171. Value * YAsInt;
  172. {
  173. auto LoadInputOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(Ctx));
  174. Constant* LoadInputOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  175. Constant* SV_Pos_ID = HlslOP->GetU32Const(SV_Position_ID);
  176. auto XPos = Builder.CreateCall(LoadInputOpFunc,
  177. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos");
  178. auto YPos = Builder.CreateCall(LoadInputOpFunc,
  179. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos");
  180. XAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(Ctx), "XIndex");
  181. YAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(Ctx), "YIndex");
  182. }
  183. // Step 2: Calculate pixel index
  184. Value * Index;
  185. {
  186. Constant* RTWidthArg = HlslOP->GetI32Const(RTWidth);
  187. auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg, "YOffset");
  188. auto Elementoffset = Builder.CreateAdd(XAsInt, YOffset, "ElementOffset");
  189. Index = Builder.CreateMul(Elementoffset, HlslOP->GetU32Const(4), "ByteIndex");
  190. }
  191. // Insert the UAV increment instruction:
  192. Function* AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
  193. Constant* AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  194. Constant* AtomicAdd = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  195. {
  196. (void)Builder.CreateCall(AtomicOpFunc, {
  197. AtomicBinOpcode,// i32, ; opcode
  198. HandleForUAV, // %dx.types.Handle, ; resource handle
  199. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  200. Index, // i32, ; coordinate c0: byte offset
  201. UndefArg, // i32, ; coordinate c1 (unused)
  202. UndefArg, // i32, ; coordinate c2 (unused)
  203. One32Arg // i32); increment value
  204. }, "UAVIncResult");
  205. }
  206. if (AddPixelCost) {
  207. // ------------------------------------------------------------------------------------------------------------
  208. // Generate instructions to increment a value corresponding to the current pixel in the second half of the UAV,
  209. // by an amount proportional to the estimated average cost of each pixel in the current draw call.
  210. // ------------------------------------------------------------------------------------------------------------
  211. // Step 1: Retrieve weight value from UAV; it will be placed after the range we're writing to
  212. Value * Weight;
  213. {
  214. Function* LoadWeight = HlslOP->GetOpFunc(OP::OpCode::BufferLoad, Type::getInt32Ty(Ctx));
  215. Constant* LoadWeightOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferLoad);
  216. Constant* OffsetIntoUAV = HlslOP->GetU32Const(NumPixels * 2 * 4);
  217. auto WeightStruct = Builder.CreateCall(LoadWeight, {
  218. LoadWeightOpcode, // i32 opcode
  219. HandleForUAV, // %dx.types.Handle, ; resource handle
  220. OffsetIntoUAV, // i32 c0: byte offset
  221. UndefArg // i32 c1: unused
  222. }, "WeightStruct");
  223. Weight = Builder.CreateExtractValue(WeightStruct, static_cast<uint64_t>(0LL), "Weight");
  224. }
  225. // Step 2: Update write position ("Index") to second half of the UAV
  226. auto OffsetIndex = Builder.CreateAdd(Index, NumPixelsByteOffsetArg, "OffsetByteIndex");
  227. // Step 3: Increment UAV value by the weight
  228. (void)Builder.CreateCall(AtomicOpFunc,{
  229. AtomicBinOpcode, // i32, ; opcode
  230. HandleForUAV, // %dx.types.Handle, ; resource handle
  231. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  232. OffsetIndex, // i32, ; coordinate c0: byte offset
  233. UndefArg, // i32, ; coordinate c1 (unused)
  234. UndefArg, // i32, ; coordinate c2 (unused)
  235. Weight // i32); increment value
  236. }, "UAVIncResult2");
  237. }
  238. }
  239. }
  240. }
  241. bool Modified = false;
  242. return Modified;
  243. }
  244. char DxilAddPixelHitInstrumentation::ID = 0;
  245. ModulePass *llvm::createDxilAddPixelHitInstrumentationPass() {
  246. return new DxilAddPixelHitInstrumentation();
  247. }
  248. INITIALIZE_PASS(DxilAddPixelHitInstrumentation, "hlsl-dxil-add-pixel-hit-instrmentation", "DXIL Count completed PS invocations and costs", false, false)