DxilDebugInstrumentation.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilDebugInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Adds instrumentation that enables shader debugging in PIX //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/HLSL/DxilGenerationPass.h"
  12. #include "dxc/DXIL/DxilModule.h"
  13. #include "dxc/DXIL/DxilOperations.h"
  14. #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
  15. #include "dxc/DXIL/DxilUtil.h"
  16. #include "llvm/IR/Module.h"
  17. #include "llvm/IR/Constants.h"
  18. #include "llvm/IR/InstIterator.h"
  19. #include "llvm/IR/IRBuilder.h"
  20. #include "llvm/ADT/STLExtras.h"
  21. #include "DxilPIXVirtualRegisters.h"
  22. using namespace llvm;
  23. using namespace hlsl;
  24. // Overview of instrumentation:
  25. //
  26. // In summary, instructions are added that cause a "trace" of the execution of the shader to be written
  27. // out to a UAV. This trace is then used by a debugger application to provide a post-mortem debugging
  28. // experience that reconstructs the execution history of the shader.
  29. //
  30. // The trace is only required for a particular shader instance of interest, and a branchless mechanism
  31. // is used to write the trace either to an incrementing location within the UAV, or to a "dumping ground"
  32. // area at the top of the UAV if the instance is not of interest.
  33. //
  34. // The following modifications are made:
  35. //
  36. // First, instructions are added to the top of the entry point function that implement the following:
  37. // - Examine the input variables that define the instance of the shader that is running. This will
  38. // be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex shaders, thread id for compute
  39. // shaders etc. If these system values need to be added to the shader, then they are also added to the
  40. // input signature, if appropriate.
  41. // - Compare the above variables with the instance of interest defined by the invoker of this pass.
  42. // Deduce two values: a multiplicand and an addend that together allow a branchless calculation of
  43. // the offset into the UAV at which to write via "offset = offset * multiplicand + addend."
  44. // If the instance is NOT of interest, the multiplicand is zero and the addend is
  45. // sizeof(UAV)-(a little bit), causing writes for uninteresting invocations to end up at the top of
  46. // the UAV. Otherwise the multiplicand is 1 and the addend is 0.
  47. // - Calculate an "instance identifier". Even with the above instance identification, several invocations may
  48. // end up matching the selection criteria. Specifically, this happens during a draw call in which many
  49. // triangles overlap the pixel of interest. More on this below.
  50. //
  51. // During execution, the instrumentation for most instructions cause data to be emitted to the UAV.
  52. // The index at which data is written is identified by treating the first uint32 of the UAV as an index
  53. // which is atomically incremented by the instrumentation. The very first value of this counter that is
  54. // encountered by each invocation is used as the "instance identifier" mentioned above. That instance
  55. // identifier is written out with each packet, since many pixel shaders executing in parallel will emit
  56. // interleaved packets, and the debugger application uses the identifiers to group packets from each separate
  57. // invocation together.
  58. //
  59. // If an instruction has a non-void and primitive return type, i.e. isn't a struct, then the instrumentation
  60. // will write that value out to the UAV as well as part of the "step" data packet.
  61. //
  62. // The limiting size of the UAV is enforced in a branchless way by ANDing the offset with a precomputed
  63. // value that is sizeof(UAV)-64. The actual size of the UAV allocated by the caller is required to be
  64. // a power of two plus 64 for this reason. The caller detects UAV overrun by examining a canary value
  65. // close to the end of the power-of-two size of the UAV. If this value has been overwritten, the debug session
  66. // is deemed to have overflowed the UAV. The caller will than allocate a UAV that is twice the size and
  67. // try again, up to a predefined maximum.
  68. // Keep this in sync with the same-named value in the debugger application's WinPixShaderUtils.h
  69. constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
  70. // These definitions echo those in the debugger application's debugshaderrecord.h file
  71. enum DebugShaderModifierRecordType {
  72. DebugShaderModifierRecordTypeInvocationStartMarker,
  73. DebugShaderModifierRecordTypeStep,
  74. DebugShaderModifierRecordTypeEvent,
  75. DebugShaderModifierRecordTypeInputRegister,
  76. DebugShaderModifierRecordTypeReadRegister,
  77. DebugShaderModifierRecordTypeWrittenRegister,
  78. DebugShaderModifierRecordTypeRegisterRelativeIndex0,
  79. DebugShaderModifierRecordTypeRegisterRelativeIndex1,
  80. DebugShaderModifierRecordTypeRegisterRelativeIndex2,
  81. DebugShaderModifierRecordTypeDXILStepVoid = 251,
  82. DebugShaderModifierRecordTypeDXILStepFloat = 252,
  83. DebugShaderModifierRecordTypeDXILStepUint32 = 253,
  84. DebugShaderModifierRecordTypeDXILStepUint64 = 254,
  85. DebugShaderModifierRecordTypeDXILStepDouble = 255,
  86. };
  87. // These structs echo those in the debugger application's debugshaderrecord.h file, but are recapitulated here
  88. // because the originals use unnamed unions which are disallowed by DXCompiler's build.
  89. //
  90. #pragma pack(push,4)
  91. struct DebugShaderModifierRecordHeader {
  92. union {
  93. struct {
  94. uint32_t SizeDwords : 4;
  95. uint32_t Flags : 4;
  96. uint32_t Type : 8;
  97. uint32_t HeaderPayload : 16;
  98. } Details;
  99. uint32_t u32Header;
  100. } Header;
  101. uint32_t UID;
  102. };
  103. struct DebugShaderModifierRecordDXILStepBase {
  104. union {
  105. struct {
  106. uint32_t SizeDwords : 4;
  107. uint32_t Flags : 4;
  108. uint32_t Type : 8;
  109. uint32_t Opcode : 16;
  110. } Details;
  111. uint32_t u32Header;
  112. } Header;
  113. uint32_t UID;
  114. uint32_t InstructionOffset;
  115. };
  116. template< typename ReturnType >
  117. struct DebugShaderModifierRecordDXILStep : public DebugShaderModifierRecordDXILStepBase {
  118. ReturnType ReturnValue;
  119. union {
  120. struct {
  121. uint32_t ValueOrdinalBase : 16;
  122. uint32_t ValueOrdinalIndex : 16;
  123. } Details;
  124. uint32_t u32ValueOrdinal;
  125. } ValueOrdinal;
  126. };
  127. template< >
  128. struct DebugShaderModifierRecordDXILStep<void> : public DebugShaderModifierRecordDXILStepBase {
  129. };
  130. #pragma pack(pop)
  131. uint32_t DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
  132. return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) / sizeof(uint32_t));
  133. }
  134. class DxilDebugInstrumentation : public ModulePass {
  135. private:
  136. union ParametersAllTogether {
  137. unsigned Parameters[3];
  138. struct PixelShaderParameters {
  139. unsigned X;
  140. unsigned Y;
  141. } PixelShader;
  142. struct VertexShaderParameters {
  143. unsigned VertexId;
  144. unsigned InstanceId;
  145. } VertexShader;
  146. struct ComputeShaderParameters {
  147. unsigned ThreadIdX;
  148. unsigned ThreadIdY;
  149. unsigned ThreadIdZ;
  150. } ComputeShader;
  151. struct GeometryShaderParameters {
  152. unsigned PrimitiveId;
  153. unsigned InstanceId;
  154. } GeometryShader;
  155. } m_Parameters = { {0,0,0} };
  156. union SystemValueIndices {
  157. struct PixelShaderParameters {
  158. unsigned Position;
  159. } PixelShader;
  160. struct VertexShaderParameters {
  161. unsigned VertexId;
  162. unsigned InstanceId;
  163. } VertexShader;
  164. struct GeometryShaderParameters {
  165. unsigned PrimitiveId;
  166. unsigned InstanceId;
  167. } GeometryShader;
  168. };
  169. uint64_t m_UAVSize = 1024*1024;
  170. Value * m_SelectionCriterion = nullptr;
  171. CallInst * m_HandleForUAV = nullptr;
  172. Value * m_InvocationId = nullptr;
  173. // Together these two values allow branchless writing to the UAV. An invocation of the shader
  174. // is either of interest or not (e.g. it writes to the pixel the user selected for debugging
  175. // or it doesn't). If not of interest, debugging output will still occur, but it will be
  176. // relegated to the very top few bytes of the UAV. Invocations of interest, by contrast, will
  177. // be written to the UAV at sequentially increasing offsets.
  178. // This value will either be one or zero (one if the invocation is of interest, zero otherwise)
  179. Value * m_OffsetMultiplicand = nullptr;
  180. // This will either be zero (if the invocation is of interest) or (UAVSize)-(SmallValue) if not.
  181. Value * m_OffsetAddend = nullptr;
  182. Constant * m_OffsetMask = nullptr;
  183. std::map<uint32_t, Value *> m_IncrementInstructionBySize;
  184. struct BuilderContext {
  185. Module &M;
  186. DxilModule &DM;
  187. LLVMContext & Ctx;
  188. OP * HlslOP;
  189. IRBuilder<> & Builder;
  190. };
  191. uint32_t m_RemainingReservedSpaceInBytes = 0;
  192. Value * m_CurrentIndex = nullptr;
  193. public:
  194. static char ID; // Pass identification, replacement for typeid
  195. explicit DxilDebugInstrumentation() : ModulePass(ID) {}
  196. const char *getPassName() const override { return "Add PIX debug instrumentation"; }
  197. void applyOptions(PassOptions O) override;
  198. bool runOnModule(Module &M) override;
  199. private:
  200. SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
  201. void addUAV(BuilderContext &BC);
  202. void addInvocationSelectionProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  203. Value * addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  204. Value * addGeometryShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  205. Value * addComputeShaderProlog(BuilderContext &BC);
  206. Value * addVertexShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  207. void addDebugEntryValue(BuilderContext &BC, Value * TheValue);
  208. void addInvocationStartMarker(BuilderContext &BC);
  209. void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
  210. void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst);
  211. void addStepDebugEntry(BuilderContext &BC, Instruction *Inst);
  212. void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex);
  213. uint32_t UAVDumpingGroundOffset();
  214. template<typename ReturnType>
  215. void addStepEntryForType(DebugShaderModifierRecordType RecordType, BuilderContext &BC, std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex);
  216. };
  217. void DxilDebugInstrumentation::applyOptions(PassOptions O) {
  218. GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
  219. GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
  220. GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
  221. GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
  222. }
  223. uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
  224. return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
  225. }
  226. DxilDebugInstrumentation::SystemValueIndices DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) {
  227. SystemValueIndices SVIndices{};
  228. hlsl::DxilSignature & InputSignature = BC.DM.GetInputSignature();
  229. auto & InputElements = InputSignature.GetElements();
  230. auto ShaderModel = BC.DM.GetShaderModel();
  231. switch (ShaderModel->GetKind()) {
  232. case DXIL::ShaderKind::Pixel: {
  233. auto Existing_SV_Position = std::find_if(
  234. InputElements.begin(), InputElements.end(),
  235. [](const std::unique_ptr<DxilSignatureElement> & Element) {
  236. return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; });
  237. // SV_Position, if present, has to have full mask, so we needn't worry
  238. // about the shader having selected components that don't include x or y.
  239. // If not present, we add it.
  240. if (Existing_SV_Position == InputElements.end()) {
  241. auto Added_SV_Position = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  242. Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4);
  243. Added_SV_Position->AppendSemanticIndex(0);
  244. Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
  245. Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
  246. auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
  247. SVIndices.PixelShader.Position = InputElements[index]->GetID();
  248. }
  249. else {
  250. SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
  251. }
  252. }
  253. break;
  254. case DXIL::ShaderKind::Vertex: {
  255. {
  256. auto Existing_SV_VertexId = std::find_if(
  257. InputElements.begin(), InputElements.end(),
  258. [](const std::unique_ptr<DxilSignatureElement> & Element) {
  259. return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::VertexID; });
  260. if (Existing_SV_VertexId == InputElements.end()) {
  261. auto Added_SV_VertexId = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  262. Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
  263. Added_SV_VertexId->AppendSemanticIndex(0);
  264. Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  265. Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID);
  266. auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId));
  267. SVIndices.VertexShader.VertexId = InputElements[index]->GetID();
  268. }
  269. else {
  270. SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID();
  271. }
  272. }
  273. {
  274. auto Existing_SV_InstanceId = std::find_if(
  275. InputElements.begin(), InputElements.end(),
  276. [](const std::unique_ptr<DxilSignatureElement> & Element) {
  277. return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::InstanceID; });
  278. if (Existing_SV_InstanceId == InputElements.end()) {
  279. auto Added_SV_InstanceId = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  280. Added_SV_InstanceId->Initialize("InstanceId", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
  281. Added_SV_InstanceId->AppendSemanticIndex(0);
  282. Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  283. Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID);
  284. auto index = InputSignature.AppendElement(std::move(Added_SV_InstanceId));
  285. SVIndices.VertexShader.InstanceId = InputElements[index]->GetID();
  286. }
  287. else {
  288. SVIndices.VertexShader.InstanceId = Existing_SV_InstanceId->get()->GetID();
  289. }
  290. }
  291. }
  292. break;
  293. case DXIL::ShaderKind::Geometry:
  294. // GS Instance Id and Primitive Id are not in the input signature
  295. break;
  296. case DXIL::ShaderKind::Compute:
  297. // Compute thread Id is not in the input signature
  298. break;
  299. default:
  300. assert(false); // guaranteed by runOnModule
  301. }
  302. return SVIndices;
  303. }
  304. Value * DxilDebugInstrumentation::addComputeShaderProlog(BuilderContext &BC) {
  305. Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
  306. Constant* One32Arg = BC.HlslOP->GetU32Const(1);
  307. Constant* Two32Arg = BC.HlslOP->GetU32Const(2);
  308. auto ThreadIdFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
  309. Constant* Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
  310. auto ThreadIdX = BC.Builder.CreateCall(ThreadIdFunc, { Opcode, Zero32Arg }, "ThreadIdX");
  311. auto ThreadIdY = BC.Builder.CreateCall(ThreadIdFunc, { Opcode, One32Arg }, "ThreadIdY");
  312. auto ThreadIdZ = BC.Builder.CreateCall(ThreadIdFunc, { Opcode, Two32Arg }, "ThreadIdZ");
  313. // Compare to expected thread ID
  314. auto CompareToX = BC.Builder.CreateICmpEQ(ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX), "CompareToThreadIdX");
  315. auto CompareToY = BC.Builder.CreateICmpEQ(ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY), "CompareToThreadIdY");
  316. auto CompareToZ = BC.Builder.CreateICmpEQ(ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ), "CompareToThreadIdZ");
  317. auto CompareXAndY = BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
  318. auto CompareAll = BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
  319. return CompareAll;
  320. }
  321. Value * DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
  322. Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
  323. Constant* Zero8Arg = BC.HlslOP->GetI8Const(0);
  324. UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  325. auto LoadInputOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
  326. Constant* LoadInputOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  327. Constant* SV_Vert_ID = BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
  328. auto VertId = BC.Builder.CreateCall(LoadInputOpFunc,
  329. { LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "VertId");
  330. Constant* SV_Instance_ID = BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
  331. auto InstanceId = BC.Builder.CreateCall(LoadInputOpFunc,
  332. { LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "InstanceId");
  333. // Compare to expected vertex ID and instance ID
  334. auto CompareToVert = BC.Builder.CreateICmpEQ(VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId), "CompareToVertId");
  335. auto CompareToInstance = BC.Builder.CreateICmpEQ(InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId), "CompareToInstanceId");
  336. auto CompareBoth = BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
  337. return CompareBoth;
  338. }
  339. Value * DxilDebugInstrumentation::addGeometryShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
  340. auto PrimitiveIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx));
  341. Constant* PrimitiveIdOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID);
  342. auto PrimId = BC.Builder.CreateCall(PrimitiveIdOpFunc,
  343. { PrimitiveIdOpcode }, "PrimId");
  344. auto CompareToPrim = BC.Builder.CreateICmpEQ(PrimId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.PrimitiveId), "CompareToPrimId");
  345. if (BC.DM.GetGSInstanceCount() <= 1) {
  346. return CompareToPrim;
  347. }
  348. auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID, Type::getInt32Ty(BC.Ctx));
  349. Constant* GSInstanceIdOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID);
  350. auto GSInstanceId = BC.Builder.CreateCall(GSInstanceIdOpFunc,
  351. { GSInstanceIdOpcode }, "GSInstanceId");
  352. // Compare to expected vertex ID and instance ID
  353. auto CompareToInstance = BC.Builder.CreateICmpEQ(GSInstanceId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId), "CompareToInstanceId");
  354. auto CompareBoth = BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth");
  355. return CompareBoth;
  356. }
  357. Value * DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
  358. Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
  359. Constant* Zero8Arg = BC.HlslOP->GetI8Const(0);
  360. Constant* One8Arg = BC.HlslOP->GetI8Const(1);
  361. UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  362. // Convert SV_POSITION to UINT
  363. Value * XAsInt;
  364. Value * YAsInt;
  365. {
  366. auto LoadInputOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
  367. Constant* LoadInputOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  368. Constant* SV_Pos_ID = BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
  369. auto XPos = BC.Builder.CreateCall(LoadInputOpFunc,
  370. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos");
  371. auto YPos = BC.Builder.CreateCall(LoadInputOpFunc,
  372. { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos");
  373. XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(BC.Ctx), "XIndex");
  374. YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(BC.Ctx), "YIndex");
  375. }
  376. // Compare to expected pixel position and primitive ID
  377. auto CompareToX = BC.Builder.CreateICmpEQ(XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
  378. auto CompareToY = BC.Builder.CreateICmpEQ(YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
  379. auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
  380. return ComparePos;
  381. }
  382. void DxilDebugInstrumentation::addUAV(BuilderContext &BC)
  383. {
  384. // Set up a UAV with structure of a single int
  385. unsigned int UAVResourceHandle = static_cast<unsigned int>(BC.DM.GetUAVs().size());
  386. SmallVector<llvm::Type*, 1> Elements{ Type::getInt32Ty(BC.Ctx) };
  387. llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
  388. std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
  389. pUAV->SetGlobalName("PIX_DebugUAVName");
  390. pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
  391. pUAV->SetID(UAVResourceHandle);
  392. pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space
  393. pUAV->SetSampleCount(1);
  394. pUAV->SetGloballyCoherent(false);
  395. pUAV->SetHasCounter(false);
  396. pUAV->SetCompType(CompType::getI32());
  397. pUAV->SetLowerBound(0);
  398. pUAV->SetRangeSize(1);
  399. pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
  400. pUAV->SetRW(true);
  401. auto ID = BC.DM.AddUAV(std::move(pUAV));
  402. assert(ID == UAVResourceHandle);
  403. BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
  404. // Create handle for the newly-added UAV
  405. Function* CreateHandleOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
  406. Constant* CreateHandleOpcodeArg = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
  407. Constant* UAVVArg = BC.HlslOP->GetI8Const(static_cast<std::underlying_type<DxilResourceBase::Class>::type>(DXIL::ResourceClass::UAV));
  408. Constant* MetaDataArg = BC.HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list
  409. Constant* IndexArg = BC.HlslOP->GetU32Const(0); //
  410. Constant* FalseArg = BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
  411. m_HandleForUAV = BC.Builder.CreateCall(CreateHandleOpFunc,
  412. { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_DebugUAV_Handle");
  413. }
  414. void DxilDebugInstrumentation::addInvocationSelectionProlog(BuilderContext &BC, SystemValueIndices SVIndices) {
  415. auto ShaderModel = BC.DM.GetShaderModel();
  416. Value * ParameterTestResult = nullptr;
  417. switch (ShaderModel->GetKind()) {
  418. case DXIL::ShaderKind::Pixel:
  419. ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
  420. break;
  421. case DXIL::ShaderKind::Geometry:
  422. ParameterTestResult = addGeometryShaderProlog(BC, SVIndices);
  423. break;
  424. case DXIL::ShaderKind::Vertex:
  425. ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
  426. break;
  427. case DXIL::ShaderKind::Compute:
  428. ParameterTestResult = addComputeShaderProlog(BC);
  429. break;
  430. default:
  431. assert(false); // guaranteed by runOnModule
  432. }
  433. // This is a convenient place to calculate the values that modify the UAV offset for invocations of interest and for
  434. // UAV size.
  435. m_OffsetMultiplicand = BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult, Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand");
  436. auto InverseOffsetMultiplicand = BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand, "ComplementOfMultiplicand");
  437. m_OffsetAddend = BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()), InverseOffsetMultiplicand, "OffsetAddend");
  438. m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
  439. m_SelectionCriterion = ParameterTestResult;
  440. }
  441. void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes) {
  442. assert(m_CurrentIndex == nullptr);
  443. assert(m_RemainingReservedSpaceInBytes == 0);
  444. m_RemainingReservedSpaceInBytes = SpaceInBytes;
  445. // Insert the UAV increment instruction:
  446. Function* AtomicOpFunc = BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
  447. Constant* AtomicBinOpcode = BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  448. Constant* AtomicAdd = BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  449. Constant* Zero32Arg = BC.HlslOP->GetU32Const(0);
  450. UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  451. // so inc will be zero for uninteresting invocations:
  452. Value * IncrementForThisInvocation;
  453. auto findIncrementInstruction = m_IncrementInstructionBySize.find(SpaceInBytes);
  454. if (findIncrementInstruction == m_IncrementInstructionBySize.end()) {
  455. Constant* Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
  456. auto it = m_IncrementInstructionBySize.emplace(
  457. SpaceInBytes, BC.Builder.CreateMul(Increment, m_OffsetMultiplicand, "IncrementForThisInvocation"));
  458. findIncrementInstruction = it.first;
  459. }
  460. IncrementForThisInvocation = findIncrementInstruction->second;
  461. auto PreviousValue = BC.Builder.CreateCall(AtomicOpFunc, {
  462. AtomicBinOpcode,// i32, ; opcode
  463. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  464. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX
  465. Zero32Arg, // i32, ; coordinate c0: index in bytes
  466. UndefArg, // i32, ; coordinate c1 (unused)
  467. UndefArg, // i32, ; coordinate c2 (unused)
  468. IncrementForThisInvocation, // i32); increment value
  469. }, "UAVIncResult");
  470. if (m_InvocationId == nullptr)
  471. {
  472. m_InvocationId = PreviousValue;
  473. }
  474. auto MaskedForLimit = BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
  475. // The return value will either end up being itself (multiplied by one and added with zero)
  476. // or the "dump uninteresting things here" value of (UAVSize - a bit).
  477. auto MultipliedForInterest = BC.Builder.CreateMul(MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest");
  478. auto AddedForInterest = BC.Builder.CreateAdd(MultipliedForInterest, m_OffsetAddend, "AddedForInterest");
  479. m_CurrentIndex = AddedForInterest;
  480. }
  481. void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, Value * TheValue) {
  482. assert(m_RemainingReservedSpaceInBytes > 0);
  483. auto TheValueTypeID = TheValue->getType()->getTypeID();
  484. if (TheValueTypeID == Type::TypeID::DoubleTyID) {
  485. Function* SplitDouble = BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
  486. Constant* SplitDoubleOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
  487. auto SplitDoubleIntruction = BC.Builder.CreateCall(SplitDouble, { SplitDoubleOpcode, TheValue }, "SplitDouble");
  488. auto LowBits = BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
  489. auto HighBits = BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
  490. //addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  491. addDebugEntryValue(BC, LowBits);
  492. addDebugEntryValue(BC, HighBits);
  493. }
  494. else if (TheValueTypeID == Type::TypeID::IntegerTyID && TheValue->getType()->getIntegerBitWidth() == 64) {
  495. auto LowBits = BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
  496. auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
  497. auto HighBits = BC.Builder.CreateTrunc(ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
  498. //addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  499. addDebugEntryValue(BC, LowBits);
  500. addDebugEntryValue(BC, HighBits);
  501. }
  502. else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  503. (TheValue->getType()->getIntegerBitWidth() == 16 || TheValue->getType()->getIntegerBitWidth() == 1)) {
  504. auto As32 = BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
  505. addDebugEntryValue(BC, As32);
  506. }
  507. else if (TheValueTypeID == Type::TypeID::HalfTyID) {
  508. auto AsFloat = BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
  509. addDebugEntryValue(BC, AsFloat);
  510. }
  511. else {
  512. Function* StoreValue = BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
  513. Constant* StoreValueOpcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
  514. UndefValue* Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  515. UndefValue* UndefArg = nullptr;
  516. if (TheValueTypeID == Type::TypeID::IntegerTyID) {
  517. UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  518. }
  519. else if (TheValueTypeID == Type::TypeID::FloatTyID) {
  520. UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx));
  521. }
  522. else {
  523. // The above are the only two valid types for a UAV store
  524. assert(false);
  525. }
  526. Constant* WriteMask_X = BC.HlslOP->GetI8Const(1);
  527. (void)BC.Builder.CreateCall(StoreValue, {
  528. StoreValueOpcode, // i32 opcode
  529. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  530. m_CurrentIndex, // i32 c0: index in bytes into UAV
  531. Undef32Arg, // i32 c1: unused
  532. TheValue,
  533. UndefArg, // unused values
  534. UndefArg, // unused values
  535. UndefArg, // unused values
  536. WriteMask_X
  537. });
  538. m_RemainingReservedSpaceInBytes -= 4;
  539. assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow
  540. if (m_RemainingReservedSpaceInBytes != 0) {
  541. m_CurrentIndex = BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4));
  542. }
  543. else {
  544. m_CurrentIndex = nullptr;
  545. }
  546. }
  547. }
  548. void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
  549. DebugShaderModifierRecordHeader marker{ {{0, 0, 0, 0}}, 0 };
  550. reserveDebugEntrySpace(BC, sizeof(marker));
  551. marker.Header.Details.SizeDwords = DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));;
  552. marker.Header.Details.Flags = 0;
  553. marker.Header.Details.Type = DebugShaderModifierRecordTypeInvocationStartMarker;
  554. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
  555. addDebugEntryValue(BC, m_InvocationId);
  556. }
  557. template<typename ReturnType>
  558. void DxilDebugInstrumentation::addStepEntryForType(DebugShaderModifierRecordType RecordType, BuilderContext &BC, std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) {
  559. DebugShaderModifierRecordDXILStep<ReturnType> step = {};
  560. reserveDebugEntrySpace(BC, sizeof(step));
  561. step.Header.Details.SizeDwords = DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
  562. step.Header.Details.Type = static_cast<uint8_t>(RecordType);
  563. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
  564. addDebugEntryValue(BC, m_InvocationId);
  565. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum));
  566. if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) {
  567. addDebugEntryValue(BC, V);
  568. IRBuilder<> &B = BC.Builder;
  569. Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16);
  570. Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF), "ValueOrdinalIndex");
  571. Value *EncodedValueOrdinalAndIndex = BC.Builder.CreateOr(VO, VOI, "ValueOrdinal");
  572. addDebugEntryValue(BC, EncodedValueOrdinalAndIndex);
  573. }
  574. }
  575. void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst) {
  576. std::uint32_t ValueOrdinalBase;
  577. std::uint32_t UnusedValueOrdinalSize;
  578. llvm::Value *ValueOrdinalIndex;
  579. if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase, &UnusedValueOrdinalSize, &ValueOrdinalIndex)) {
  580. return;
  581. }
  582. std::uint32_t InstNum;
  583. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  584. return;
  585. }
  586. addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase, ValueOrdinalIndex);
  587. }
  588. void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC, Instruction *Inst) {
  589. if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
  590. return;
  591. }
  592. if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
  593. addStoreStepDebugEntry(BC, St);
  594. return;
  595. }
  596. std::uint32_t RegNum;
  597. if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) {
  598. return;
  599. }
  600. std::uint32_t InstNum;
  601. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  602. return;
  603. }
  604. addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0));
  605. }
  606. void DxilDebugInstrumentation::addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) {
  607. const Type::TypeID ID = V->getType()->getTypeID();
  608. switch (ID) {
  609. case Type::TypeID::StructTyID:
  610. case Type::TypeID::VoidTyID:
  611. addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC, InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  612. break;
  613. case Type::TypeID::FloatTyID:
  614. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC, InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  615. break;
  616. case Type::TypeID::IntegerTyID:
  617. if (V->getType()->getIntegerBitWidth() == 64) {
  618. addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64, BC, InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  619. }
  620. else {
  621. addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32, BC, InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  622. }
  623. break;
  624. case Type::TypeID::DoubleTyID:
  625. addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC, InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  626. break;
  627. case Type::TypeID::HalfTyID:
  628. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC, InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  629. break;
  630. case Type::TypeID::PointerTyID:
  631. // Skip pointer calculation instructions. They aren't particularly meaningful to the user (being a mere
  632. // implementation detail for lookup tables, etc.), and their type is problematic from a UI point of view.
  633. // The subsequent instructions that dereference the pointer will be properly instrumented and show the
  634. // (meaningful) retrieved value.
  635. break;
  636. case Type::TypeID::FP128TyID:
  637. case Type::TypeID::LabelTyID:
  638. case Type::TypeID::MetadataTyID:
  639. case Type::TypeID::FunctionTyID:
  640. case Type::TypeID::ArrayTyID:
  641. case Type::TypeID::VectorTyID:
  642. case Type::TypeID::X86_FP80TyID:
  643. case Type::TypeID::X86_MMXTyID:
  644. case Type::TypeID::PPC_FP128TyID:
  645. assert(false);
  646. }
  647. }
  648. bool DxilDebugInstrumentation::runOnModule(Module &M) {
  649. DxilModule &DM = M.GetOrCreateDxilModule();
  650. LLVMContext & Ctx = M.getContext();
  651. OP *HlslOP = DM.GetOP();
  652. auto ShaderModel = DM.GetShaderModel();
  653. switch (ShaderModel->GetKind()) {
  654. case DXIL::ShaderKind::Pixel:
  655. case DXIL::ShaderKind::Vertex:
  656. case DXIL::ShaderKind::Compute:
  657. case DXIL::ShaderKind::Geometry:
  658. break;
  659. default:
  660. return false;
  661. }
  662. // First record pointers to all instructions in the function:
  663. std::vector<Instruction*> AllInstructions;
  664. for (inst_iterator I = inst_begin(DM.GetEntryFunction()), E = inst_end(DM.GetEntryFunction()); I != E; ++I) {
  665. AllInstructions.push_back(&*I);
  666. }
  667. // Branchless instrumentation requires taking care of a few things:
  668. // -Each invocation of the shader will be either of interest or not of interest
  669. // -If of interest, the offset into the output UAV will be as expected
  670. // -If not, the offset is forced to (UAVsize) - (Small Amount), and that output is ignored by the CPU-side code.
  671. // -The invocation of interest may overflow the UAV. This is handled by taking the modulus of the
  672. // output index. Overflow is then detected on the CPU side by checking for the presence of a canary
  673. // value at (UAVSize) - (Small Amount) * 2 (which is actually a conservative definition of overflow).
  674. //
  675. Instruction* firstInsertionPt = dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
  676. IRBuilder<> Builder(firstInsertionPt);
  677. BuilderContext BC{ M, DM, Ctx, HlslOP, Builder };
  678. addUAV(BC);
  679. auto SystemValues = addRequiredSystemValues(BC);
  680. addInvocationSelectionProlog(BC, SystemValues);
  681. addInvocationStartMarker(BC);
  682. // Instrument original instructions:
  683. for (auto & Inst : AllInstructions) {
  684. // Instrumentation goes after the instruction if it is not a terminator. Otherwise,
  685. // Instrumentation goes prior to the instruction.
  686. if (!Inst->isTerminator()) {
  687. IRBuilder<> Builder(Inst->getNextNode());
  688. BuilderContext BC2{ BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder };
  689. addStepDebugEntry(BC2, Inst);
  690. }
  691. else {
  692. // Insert before this instruction
  693. IRBuilder<> Builder(Inst);
  694. BuilderContext BC2{ BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder };
  695. addStepDebugEntry(BC2, Inst);
  696. }
  697. }
  698. DM.ReEmitDxilResources();
  699. return true;
  700. }
  701. char DxilDebugInstrumentation::ID = 0;
  702. ModulePass *llvm::createDxilDebugInstrumentationPass() {
  703. return new DxilDebugInstrumentation();
  704. }
  705. INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation", "HLSL DXIL debug instrumentation for PIX", false, false)