DxilDebugInstrumentation.cpp 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilDebugInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Adds instrumentation that enables shader debugging in PIX //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/DXIL/DxilModule.h"
  12. #include "dxc/DXIL/DxilOperations.h"
  13. #include "dxc/DXIL/DxilUtil.h"
  14. #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
  15. #include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
  16. #include "dxc/HLSL/DxilGenerationPass.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/IR/Constants.h"
  19. #include "llvm/IR/IRBuilder.h"
  20. #include "llvm/IR/InstIterator.h"
  21. #include "llvm/IR/Module.h"
  22. #include "PixPassHelpers.h"
  23. using namespace llvm;
  24. using namespace hlsl;
  25. // Overview of instrumentation:
  26. //
  27. // In summary, instructions are added that cause a "trace" of the execution of
  28. // the shader to be written out to a UAV. This trace is then used by a debugger
  29. // application to provide a post-mortem debugging experience that reconstructs
  30. // the execution history of the shader.
  31. //
  32. // The trace is only required for a particular shader instance of interest, and
  33. // a branchless mechanism is used to write the trace either to an incrementing
  34. // location within the UAV, or to a "dumping ground" area at the top of the UAV
  35. // if the instance is not of interest.
  36. //
  37. // The following modifications are made:
  38. //
  39. // First, instructions are added to the top of the entry point function that
  40. // implement the following:
  41. // - Examine the input variables that define the instance of the shader that is
  42. // running. This will
  43. // be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex
  44. // shaders, thread id for compute shaders etc. If these system values need to
  45. // be added to the shader, then they are also added to the input signature,
  46. // if appropriate.
  47. // - Compare the above variables with the instance of interest defined by the
  48. // invoker of this pass.
  49. // Deduce two values: a multiplicand and an addend that together allow a
  50. // branchless calculation of the offset into the UAV at which to write via
  51. // "offset = offset * multiplicand + addend." If the instance is NOT of
  52. // interest, the multiplicand is zero and the addend is sizeof(UAV)-(a little
  53. // bit), causing writes for uninteresting invocations to end up at the top of
  54. // the UAV. Otherwise the multiplicand is 1 and the addend is 0.
  55. // - Calculate an "instance identifier". Even with the above instance
  56. // identification, several invocations may
  57. // end up matching the selection criteria. Specifically, this happens during
  58. // a draw call in which many triangles overlap the pixel of interest. More on
  59. // this below.
  60. //
  61. // During execution, the instrumentation for most instructions cause data to be
  62. // emitted to the UAV. The index at which data is written is identified by
  63. // treating the first uint32 of the UAV as an index which is atomically
  64. // incremented by the instrumentation. The very first value of this counter that
  65. // is encountered by each invocation is used as the "instance identifier"
  66. // mentioned above. That instance identifier is written out with each packet,
  67. // since many pixel shaders executing in parallel will emit interleaved packets,
  68. // and the debugger application uses the identifiers to group packets from each
  69. // separate invocation together.
  70. //
  71. // If an instruction has a non-void and primitive return type, i.e. isn't a
  72. // struct, then the instrumentation will write that value out to the UAV as well
  73. // as part of the "step" data packet.
  74. //
  75. // The limiting size of the UAV is enforced in a branchless way by ANDing the
  76. // offset with a precomputed value that is sizeof(UAV)-64. The actual size of
  77. // the UAV allocated by the caller is required to be a power of two plus 64 for
  78. // this reason. The caller detects UAV overrun by examining a canary value close
  79. // to the end of the power-of-two size of the UAV. If this value has been
  80. // overwritten, the debug session is deemed to have overflowed the UAV. The
  81. // caller will than allocate a UAV that is twice the size and try again, up to a
  82. // predefined maximum.
  83. // Keep these in sync with the same-named value in the debugger application's
  84. // WinPixShaderUtils.h
  85. constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
  86. // The actual max size per record is much smaller than this, but it never
  87. // hurts to be generous.
  88. constexpr size_t CounterOffsetBeyondUsefulData = DebugBufferDumpingGroundSize / 2;
  89. // These definitions echo those in the debugger application's
  90. // debugshaderrecord.h file
  91. enum DebugShaderModifierRecordType {
  92. DebugShaderModifierRecordTypeInvocationStartMarker,
  93. DebugShaderModifierRecordTypeStep,
  94. DebugShaderModifierRecordTypeEvent,
  95. DebugShaderModifierRecordTypeInputRegister,
  96. DebugShaderModifierRecordTypeReadRegister,
  97. DebugShaderModifierRecordTypeWrittenRegister,
  98. DebugShaderModifierRecordTypeRegisterRelativeIndex0,
  99. DebugShaderModifierRecordTypeRegisterRelativeIndex1,
  100. DebugShaderModifierRecordTypeRegisterRelativeIndex2,
  101. DebugShaderModifierRecordTypeDXILStepVoid = 251,
  102. DebugShaderModifierRecordTypeDXILStepFloat = 252,
  103. DebugShaderModifierRecordTypeDXILStepUint32 = 253,
  104. DebugShaderModifierRecordTypeDXILStepUint64 = 254,
  105. DebugShaderModifierRecordTypeDXILStepDouble = 255,
  106. };
  107. // These structs echo those in the debugger application's debugshaderrecord.h
  108. // file, but are recapitulated here because the originals use unnamed unions
  109. // which are disallowed by DXCompiler's build.
  110. //
  111. #pragma pack(push, 4)
  112. struct DebugShaderModifierRecordHeader {
  113. union {
  114. struct {
  115. uint32_t SizeDwords : 4;
  116. uint32_t Flags : 4;
  117. uint32_t Type : 8;
  118. uint32_t HeaderPayload : 16;
  119. } Details;
  120. uint32_t u32Header;
  121. } Header;
  122. uint32_t UID;
  123. };
  124. struct DebugShaderModifierRecordDXILStepBase {
  125. union {
  126. struct {
  127. uint32_t SizeDwords : 4;
  128. uint32_t Flags : 4;
  129. uint32_t Type : 8;
  130. uint32_t Opcode : 16;
  131. } Details;
  132. uint32_t u32Header;
  133. } Header;
  134. uint32_t UID;
  135. uint32_t InstructionOffset;
  136. };
  137. template <typename ReturnType>
  138. struct DebugShaderModifierRecordDXILStep
  139. : public DebugShaderModifierRecordDXILStepBase {
  140. ReturnType ReturnValue;
  141. union {
  142. struct {
  143. uint32_t ValueOrdinalBase : 16;
  144. uint32_t ValueOrdinalIndex : 16;
  145. } Details;
  146. uint32_t u32ValueOrdinal;
  147. } ValueOrdinal;
  148. };
  149. template <>
  150. struct DebugShaderModifierRecordDXILStep<void>
  151. : public DebugShaderModifierRecordDXILStepBase {};
  152. #pragma pack(pop)
  153. uint32_t
  154. DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
  155. return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) /
  156. sizeof(uint32_t));
  157. }
  158. class DxilDebugInstrumentation : public ModulePass {
  159. private:
  160. union ParametersAllTogether {
  161. unsigned Parameters[3];
  162. struct PixelShaderParameters {
  163. unsigned X;
  164. unsigned Y;
  165. } PixelShader;
  166. struct VertexShaderParameters {
  167. unsigned VertexId;
  168. unsigned InstanceId;
  169. } VertexShader;
  170. struct ComputeShaderParameters {
  171. unsigned ThreadIdX;
  172. unsigned ThreadIdY;
  173. unsigned ThreadIdZ;
  174. } ComputeShader;
  175. struct GeometryShaderParameters {
  176. unsigned PrimitiveId;
  177. unsigned InstanceId;
  178. } GeometryShader;
  179. } m_Parameters = {{0, 0, 0}};
  180. union SystemValueIndices {
  181. struct PixelShaderParameters {
  182. unsigned Position;
  183. } PixelShader;
  184. struct VertexShaderParameters {
  185. unsigned VertexId;
  186. unsigned InstanceId;
  187. } VertexShader;
  188. struct GeometryShaderParameters {
  189. unsigned PrimitiveId;
  190. unsigned InstanceId;
  191. } GeometryShader;
  192. };
  193. uint64_t m_UAVSize = 1024 * 1024;
  194. Value *m_SelectionCriterion = nullptr;
  195. CallInst *m_HandleForUAV = nullptr;
  196. Value *m_InvocationId = nullptr;
  197. // Together these two values allow branchless writing to the UAV. An
  198. // invocation of the shader is either of interest or not (e.g. it writes to
  199. // the pixel the user selected for debugging or it doesn't). If not of
  200. // interest, debugging output will still occur, but it will be relegated to
  201. // the very top few bytes of the UAV. Invocations of interest, by contrast,
  202. // will be written to the UAV at sequentially increasing offsets.
  203. // This value will either be one or zero (one if the invocation is of
  204. // interest, zero otherwise)
  205. Value *m_OffsetMultiplicand = nullptr;
  206. // This will either be zero (if the invocation is of interest) or
  207. // (UAVSize)-(SmallValue) if not.
  208. Value *m_OffsetAddend = nullptr;
  209. Constant *m_OffsetMask = nullptr;
  210. Constant *m_CounterOffset = nullptr;
  211. struct BuilderContext {
  212. Module &M;
  213. DxilModule &DM;
  214. LLVMContext &Ctx;
  215. OP *HlslOP;
  216. IRBuilder<> &Builder;
  217. };
  218. uint32_t m_RemainingReservedSpaceInBytes = 0;
  219. Value *m_CurrentIndex = nullptr;
  220. public:
  221. static char ID; // Pass identification, replacement for typeid
  222. explicit DxilDebugInstrumentation() : ModulePass(ID) {}
  223. const char *getPassName() const override {
  224. return "Add PIX debug instrumentation";
  225. }
  226. void applyOptions(PassOptions O) override;
  227. bool runOnModule(Module &M) override;
  228. private:
  229. SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
  230. void addInvocationSelectionProlog(BuilderContext &BC,
  231. SystemValueIndices SVIndices);
  232. Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  233. Value *addGeometryShaderProlog(BuilderContext &BC,
  234. SystemValueIndices SVIndices);
  235. Value *addDispatchedShaderProlog(BuilderContext &BC);
  236. Value *addVertexShaderProlog(BuilderContext &BC,
  237. SystemValueIndices SVIndices);
  238. void addDebugEntryValue(BuilderContext &BC, Value *TheValue);
  239. void addInvocationStartMarker(BuilderContext &BC);
  240. void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
  241. void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst);
  242. void addStepDebugEntry(BuilderContext& BC, Instruction* Inst);
  243. void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum,
  244. Value *V, std::uint32_t ValueOrdinal,
  245. Value *ValueOrdinalIndex);
  246. uint32_t UAVDumpingGroundOffset();
  247. template <typename ReturnType>
  248. void addStepEntryForType(DebugShaderModifierRecordType RecordType,
  249. BuilderContext &BC, std::uint32_t InstNum, Value *V,
  250. std::uint32_t ValueOrdinal,
  251. Value *ValueOrdinalIndex);
  252. };
  253. void DxilDebugInstrumentation::applyOptions(PassOptions O) {
  254. GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
  255. GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
  256. GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
  257. GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
  258. }
  259. uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
  260. return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
  261. }
  262. DxilDebugInstrumentation::SystemValueIndices
  263. DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) {
  264. SystemValueIndices SVIndices{};
  265. hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature();
  266. auto &InputElements = InputSignature.GetElements();
  267. auto ShaderModel = BC.DM.GetShaderModel();
  268. switch (ShaderModel->GetKind()) {
  269. case DXIL::ShaderKind::Amplification:
  270. case DXIL::ShaderKind::Mesh:
  271. case DXIL::ShaderKind::Compute:
  272. // Dispatch* thread Id is not in the input signature
  273. break;
  274. case DXIL::ShaderKind::Vertex: {
  275. {
  276. auto Existing_SV_VertexId = std::find_if(
  277. InputElements.begin(), InputElements.end(),
  278. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  279. return Element->GetSemantic()->GetKind() ==
  280. hlsl::DXIL::SemanticKind::VertexID;
  281. });
  282. if (Existing_SV_VertexId == InputElements.end()) {
  283. auto Added_SV_VertexId =
  284. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  285. Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(),
  286. hlsl::DXIL::InterpolationMode::Undefined,
  287. 1, 1);
  288. Added_SV_VertexId->AppendSemanticIndex(0);
  289. Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  290. Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID);
  291. auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId));
  292. SVIndices.VertexShader.VertexId = InputElements[index]->GetID();
  293. } else {
  294. SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID();
  295. }
  296. }
  297. {
  298. auto Existing_SV_InstanceId = std::find_if(
  299. InputElements.begin(), InputElements.end(),
  300. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  301. return Element->GetSemantic()->GetKind() ==
  302. hlsl::DXIL::SemanticKind::InstanceID;
  303. });
  304. if (Existing_SV_InstanceId == InputElements.end()) {
  305. auto Added_SV_InstanceId =
  306. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  307. Added_SV_InstanceId->Initialize(
  308. "InstanceId", hlsl::CompType::getF32(),
  309. hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
  310. Added_SV_InstanceId->AppendSemanticIndex(0);
  311. Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  312. Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID);
  313. auto index =
  314. InputSignature.AppendElement(std::move(Added_SV_InstanceId));
  315. SVIndices.VertexShader.InstanceId = InputElements[index]->GetID();
  316. } else {
  317. SVIndices.VertexShader.InstanceId =
  318. Existing_SV_InstanceId->get()->GetID();
  319. }
  320. }
  321. } break;
  322. case DXIL::ShaderKind::Geometry:
  323. // GS Instance Id and Primitive Id are not in the input signature
  324. break;
  325. case DXIL::ShaderKind::Pixel: {
  326. auto Existing_SV_Position =
  327. std::find_if(InputElements.begin(), InputElements.end(),
  328. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  329. return Element->GetSemantic()->GetKind() ==
  330. hlsl::DXIL::SemanticKind::Position;
  331. });
  332. // SV_Position, if present, has to have full mask, so we needn't worry
  333. // about the shader having selected components that don't include x or y.
  334. // If not present, we add it.
  335. if (Existing_SV_Position == InputElements.end()) {
  336. auto Added_SV_Position =
  337. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  338. Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(),
  339. hlsl::DXIL::InterpolationMode::Linear, 1,
  340. 4);
  341. Added_SV_Position->AppendSemanticIndex(0);
  342. Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
  343. Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
  344. auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
  345. SVIndices.PixelShader.Position = InputElements[index]->GetID();
  346. } else {
  347. SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
  348. }
  349. } break;
  350. default:
  351. assert(false); // guaranteed by runOnModule
  352. }
  353. return SVIndices;
  354. }
  355. Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) {
  356. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  357. Constant *One32Arg = BC.HlslOP->GetU32Const(1);
  358. Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
  359. auto ThreadIdFunc =
  360. BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
  361. Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
  362. auto ThreadIdX =
  363. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX");
  364. auto ThreadIdY =
  365. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY");
  366. auto ThreadIdZ =
  367. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ");
  368. // Compare to expected thread ID
  369. auto CompareToX = BC.Builder.CreateICmpEQ(
  370. ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX),
  371. "CompareToThreadIdX");
  372. auto CompareToY = BC.Builder.CreateICmpEQ(
  373. ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY),
  374. "CompareToThreadIdY");
  375. auto CompareToZ = BC.Builder.CreateICmpEQ(
  376. ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ),
  377. "CompareToThreadIdZ");
  378. auto CompareXAndY =
  379. BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
  380. auto CompareAll =
  381. BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
  382. return CompareAll;
  383. }
  384. Value *
  385. DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC,
  386. SystemValueIndices SVIndices) {
  387. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  388. Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
  389. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  390. auto LoadInputOpFunc =
  391. BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
  392. Constant *LoadInputOpcode =
  393. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  394. Constant *SV_Vert_ID =
  395. BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
  396. auto VertId =
  397. BC.Builder.CreateCall(LoadInputOpFunc,
  398. {LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/,
  399. Zero8Arg /*column*/, UndefArg},
  400. "VertId");
  401. Constant *SV_Instance_ID =
  402. BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
  403. auto InstanceId =
  404. BC.Builder.CreateCall(LoadInputOpFunc,
  405. {LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/,
  406. Zero8Arg /*column*/, UndefArg},
  407. "InstanceId");
  408. // Compare to expected vertex ID and instance ID
  409. auto CompareToVert = BC.Builder.CreateICmpEQ(
  410. VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId),
  411. "CompareToVertId");
  412. auto CompareToInstance = BC.Builder.CreateICmpEQ(
  413. InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId),
  414. "CompareToInstanceId");
  415. auto CompareBoth =
  416. BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
  417. return CompareBoth;
  418. }
  419. Value *DxilDebugInstrumentation::addGeometryShaderProlog(
  420. BuilderContext &BC, SystemValueIndices SVIndices) {
  421. auto PrimitiveIdOpFunc =
  422. BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx));
  423. Constant *PrimitiveIdOpcode =
  424. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID);
  425. auto PrimId =
  426. BC.Builder.CreateCall(PrimitiveIdOpFunc, {PrimitiveIdOpcode}, "PrimId");
  427. auto CompareToPrim = BC.Builder.CreateICmpEQ(
  428. PrimId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.PrimitiveId),
  429. "CompareToPrimId");
  430. if (BC.DM.GetGSInstanceCount() <= 1) {
  431. return CompareToPrim;
  432. }
  433. auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID,
  434. Type::getInt32Ty(BC.Ctx));
  435. Constant *GSInstanceIdOpcode =
  436. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID);
  437. auto GSInstanceId = BC.Builder.CreateCall(
  438. GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId");
  439. // Compare to expected vertex ID and instance ID
  440. auto CompareToInstance = BC.Builder.CreateICmpEQ(
  441. GSInstanceId,
  442. BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId),
  443. "CompareToInstanceId");
  444. auto CompareBoth =
  445. BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth");
  446. return CompareBoth;
  447. }
  448. Value *
  449. DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
  450. SystemValueIndices SVIndices) {
  451. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  452. Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
  453. Constant *One8Arg = BC.HlslOP->GetI8Const(1);
  454. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  455. // Convert SV_POSITION to UINT
  456. Value *XAsInt;
  457. Value *YAsInt;
  458. {
  459. auto LoadInputOpFunc =
  460. BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
  461. Constant *LoadInputOpcode =
  462. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  463. Constant *SV_Pos_ID =
  464. BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
  465. auto XPos =
  466. BC.Builder.CreateCall(LoadInputOpFunc,
  467. {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
  468. Zero8Arg /*column*/, UndefArg},
  469. "XPos");
  470. auto YPos =
  471. BC.Builder.CreateCall(LoadInputOpFunc,
  472. {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
  473. One8Arg /*column*/, UndefArg},
  474. "YPos");
  475. XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos,
  476. Type::getInt32Ty(BC.Ctx), "XIndex");
  477. YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos,
  478. Type::getInt32Ty(BC.Ctx), "YIndex");
  479. }
  480. // Compare to expected pixel position and primitive ID
  481. auto CompareToX = BC.Builder.CreateICmpEQ(
  482. XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
  483. auto CompareToY = BC.Builder.CreateICmpEQ(
  484. YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
  485. auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
  486. return ComparePos;
  487. }
  488. void DxilDebugInstrumentation::addInvocationSelectionProlog(
  489. BuilderContext &BC, SystemValueIndices SVIndices) {
  490. auto ShaderModel = BC.DM.GetShaderModel();
  491. Value *ParameterTestResult = nullptr;
  492. switch (ShaderModel->GetKind()) {
  493. case DXIL::ShaderKind::Compute:
  494. case DXIL::ShaderKind::Amplification:
  495. case DXIL::ShaderKind::Mesh:
  496. ParameterTestResult = addDispatchedShaderProlog(BC);
  497. break;
  498. case DXIL::ShaderKind::Geometry:
  499. ParameterTestResult = addGeometryShaderProlog(BC, SVIndices);
  500. break;
  501. case DXIL::ShaderKind::Vertex:
  502. ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
  503. break;
  504. case DXIL::ShaderKind::Pixel:
  505. ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
  506. break;
  507. default:
  508. assert(false); // guaranteed by runOnModule
  509. }
  510. // This is a convenient place to calculate the values that modify the UAV
  511. // offset for invocations of interest and for UAV size.
  512. m_OffsetMultiplicand =
  513. BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult,
  514. Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand");
  515. auto InverseOffsetMultiplicand =
  516. BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand,
  517. "ComplementOfMultiplicand");
  518. m_OffsetAddend =
  519. BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()),
  520. InverseOffsetMultiplicand, "OffsetAddend");
  521. m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
  522. m_CounterOffset = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + CounterOffsetBeyondUsefulData);
  523. m_SelectionCriterion = ParameterTestResult;
  524. }
  525. void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC,
  526. uint32_t SpaceInBytes) {
  527. assert(m_CurrentIndex == nullptr);
  528. assert(m_RemainingReservedSpaceInBytes == 0);
  529. m_RemainingReservedSpaceInBytes = SpaceInBytes;
  530. // Insert the UAV increment instruction:
  531. Function *AtomicOpFunc =
  532. BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
  533. Constant *AtomicBinOpcode =
  534. BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  535. Constant *AtomicAdd =
  536. BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  537. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  538. // so inc will be zero for uninteresting invocations:
  539. Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
  540. Value *IncrementForThisInvocation = BC.Builder.CreateMul(
  541. Increment, m_OffsetMultiplicand, "IncrementForThisInvocation");
  542. auto PreviousValue = BC.Builder.CreateCall(
  543. AtomicOpFunc,
  544. {
  545. AtomicBinOpcode, // i32, ; opcode
  546. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  547. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
  548. // XOR, IMIN, IMAX, UMIN, UMAX
  549. m_CounterOffset, // i32, ; coordinate c0: index in bytes
  550. UndefArg, // i32, ; coordinate c1 (unused)
  551. UndefArg, // i32, ; coordinate c2 (unused)
  552. IncrementForThisInvocation, // i32); increment value
  553. },
  554. "UAVIncResult");
  555. if (m_InvocationId == nullptr) {
  556. m_InvocationId = PreviousValue;
  557. }
  558. auto MaskedForLimit =
  559. BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
  560. // The return value will either end up being itself (multiplied by one and
  561. // added with zero) or the "dump uninteresting things here" value of (UAVSize
  562. // - a bit).
  563. auto MultipliedForInterest = BC.Builder.CreateMul(
  564. MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest");
  565. auto AddedForInterest = BC.Builder.CreateAdd(
  566. MultipliedForInterest, m_OffsetAddend, "AddedForInterest");
  567. m_CurrentIndex = AddedForInterest;
  568. }
  569. void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC,
  570. Value *TheValue) {
  571. assert(m_RemainingReservedSpaceInBytes > 0);
  572. auto TheValueTypeID = TheValue->getType()->getTypeID();
  573. if (TheValueTypeID == Type::TypeID::DoubleTyID) {
  574. Function *SplitDouble =
  575. BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
  576. Constant *SplitDoubleOpcode =
  577. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
  578. auto SplitDoubleIntruction = BC.Builder.CreateCall(
  579. SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble");
  580. auto LowBits =
  581. BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
  582. auto HighBits =
  583. BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
  584. // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  585. addDebugEntryValue(BC, LowBits);
  586. addDebugEntryValue(BC, HighBits);
  587. } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  588. TheValue->getType()->getIntegerBitWidth() == 64) {
  589. auto LowBits =
  590. BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
  591. auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
  592. auto HighBits = BC.Builder.CreateTrunc(
  593. ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
  594. // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  595. addDebugEntryValue(BC, LowBits);
  596. addDebugEntryValue(BC, HighBits);
  597. } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  598. (TheValue->getType()->getIntegerBitWidth() == 16 ||
  599. TheValue->getType()->getIntegerBitWidth() == 1)) {
  600. auto As32 =
  601. BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
  602. addDebugEntryValue(BC, As32);
  603. } else if (TheValueTypeID == Type::TypeID::HalfTyID) {
  604. auto AsFloat =
  605. BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
  606. addDebugEntryValue(BC, AsFloat);
  607. } else {
  608. Function *StoreValue =
  609. BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore,
  610. TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
  611. Constant *StoreValueOpcode =
  612. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
  613. UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  614. UndefValue *UndefArg = nullptr;
  615. if (TheValueTypeID == Type::TypeID::IntegerTyID) {
  616. UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  617. } else if (TheValueTypeID == Type::TypeID::FloatTyID) {
  618. UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx));
  619. } else {
  620. // The above are the only two valid types for a UAV store
  621. assert(false);
  622. }
  623. Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
  624. (void)BC.Builder.CreateCall(
  625. StoreValue, {StoreValueOpcode, // i32 opcode
  626. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  627. m_CurrentIndex, // i32 c0: index in bytes into UAV
  628. Undef32Arg, // i32 c1: unused
  629. TheValue,
  630. UndefArg, // unused values
  631. UndefArg, // unused values
  632. UndefArg, // unused values
  633. WriteMask_X});
  634. m_RemainingReservedSpaceInBytes -= 4;
  635. assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow
  636. if (m_RemainingReservedSpaceInBytes != 0) {
  637. m_CurrentIndex =
  638. BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4));
  639. } else {
  640. m_CurrentIndex = nullptr;
  641. }
  642. }
  643. }
  644. void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
  645. DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0};
  646. reserveDebugEntrySpace(BC, sizeof(marker));
  647. marker.Header.Details.SizeDwords =
  648. DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));
  649. ;
  650. marker.Header.Details.Flags = 0;
  651. marker.Header.Details.Type =
  652. DebugShaderModifierRecordTypeInvocationStartMarker;
  653. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
  654. addDebugEntryValue(BC, m_InvocationId);
  655. }
  656. template <typename ReturnType>
  657. void DxilDebugInstrumentation::addStepEntryForType(
  658. DebugShaderModifierRecordType RecordType, BuilderContext &BC,
  659. std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal,
  660. Value *ValueOrdinalIndex) {
  661. DebugShaderModifierRecordDXILStep<ReturnType> step = {};
  662. reserveDebugEntrySpace(BC, sizeof(step));
  663. step.Header.Details.SizeDwords =
  664. DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
  665. step.Header.Details.Type = static_cast<uint8_t>(RecordType);
  666. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
  667. addDebugEntryValue(BC, m_InvocationId);
  668. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum));
  669. if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) {
  670. addDebugEntryValue(BC, V);
  671. IRBuilder<> &B = BC.Builder;
  672. Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16);
  673. Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF),
  674. "ValueOrdinalIndex");
  675. Value *EncodedValueOrdinalAndIndex =
  676. BC.Builder.CreateOr(VO, VOI, "ValueOrdinal");
  677. addDebugEntryValue(BC, EncodedValueOrdinalAndIndex);
  678. }
  679. }
  680. void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext& BC,
  681. StoreInst* Inst) {
  682. std::uint32_t ValueOrdinalBase;
  683. std::uint32_t UnusedValueOrdinalSize;
  684. llvm::Value* ValueOrdinalIndex;
  685. if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
  686. &UnusedValueOrdinalSize,
  687. &ValueOrdinalIndex)) {
  688. return;
  689. }
  690. std::uint32_t InstNum;
  691. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  692. return;
  693. }
  694. if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst->getValueOperand())) {
  695. return;
  696. }
  697. addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase,
  698. ValueOrdinalIndex);
  699. }
  700. void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC,
  701. Instruction *Inst) {
  702. if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
  703. return;
  704. }
  705. if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst)) {
  706. return;
  707. }
  708. if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
  709. addStoreStepDebugEntry(BC, St);
  710. return;
  711. }
  712. std::uint32_t RegNum;
  713. if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) {
  714. return;
  715. }
  716. std::uint32_t InstNum;
  717. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  718. return;
  719. }
  720. addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0));
  721. }
  722. void DxilDebugInstrumentation::addStepDebugEntryValue(
  723. BuilderContext &BC, std::uint32_t InstNum, Value *V,
  724. std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) {
  725. const Type::TypeID ID = V->getType()->getTypeID();
  726. switch (ID) {
  727. case Type::TypeID::StructTyID:
  728. case Type::TypeID::VoidTyID:
  729. addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC,
  730. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  731. break;
  732. case Type::TypeID::FloatTyID:
  733. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
  734. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  735. break;
  736. case Type::TypeID::IntegerTyID:
  737. if (V->getType()->getIntegerBitWidth() == 64) {
  738. addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64,
  739. BC, InstNum, V, ValueOrdinal,
  740. ValueOrdinalIndex);
  741. } else {
  742. addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32,
  743. BC, InstNum, V, ValueOrdinal,
  744. ValueOrdinalIndex);
  745. }
  746. break;
  747. case Type::TypeID::DoubleTyID:
  748. addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC,
  749. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  750. break;
  751. case Type::TypeID::HalfTyID:
  752. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
  753. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  754. break;
  755. case Type::TypeID::PointerTyID:
  756. // Skip pointer calculation instructions. They aren't particularly
  757. // meaningful to the user (being a mere implementation detail for lookup
  758. // tables, etc.), and their type is problematic from a UI point of view. The
  759. // subsequent instructions that dereference the pointer will be properly
  760. // instrumented and show the (meaningful) retrieved value.
  761. break;
  762. case Type::TypeID::FP128TyID:
  763. case Type::TypeID::LabelTyID:
  764. case Type::TypeID::MetadataTyID:
  765. case Type::TypeID::FunctionTyID:
  766. case Type::TypeID::ArrayTyID:
  767. case Type::TypeID::VectorTyID:
  768. case Type::TypeID::X86_FP80TyID:
  769. case Type::TypeID::X86_MMXTyID:
  770. case Type::TypeID::PPC_FP128TyID:
  771. assert(false);
  772. }
  773. }
  774. bool DxilDebugInstrumentation::runOnModule(Module &M) {
  775. DxilModule &DM = M.GetOrCreateDxilModule();
  776. LLVMContext &Ctx = M.getContext();
  777. OP *HlslOP = DM.GetOP();
  778. auto ShaderModel = DM.GetShaderModel();
  779. switch (ShaderModel->GetKind()) {
  780. case DXIL::ShaderKind::Amplification:
  781. case DXIL::ShaderKind::Mesh:
  782. case DXIL::ShaderKind::Vertex:
  783. case DXIL::ShaderKind::Geometry:
  784. case DXIL::ShaderKind::Pixel:
  785. case DXIL::ShaderKind::Compute:
  786. break;
  787. default:
  788. return false;
  789. }
  790. // First record pointers to all instructions in the function:
  791. std::vector<Instruction *> AllInstructions;
  792. for (inst_iterator I = inst_begin(DM.GetEntryFunction()),
  793. E = inst_end(DM.GetEntryFunction());
  794. I != E; ++I) {
  795. AllInstructions.push_back(&*I);
  796. }
  797. // Branchless instrumentation requires taking care of a few things:
  798. // -Each invocation of the shader will be either of interest or not of
  799. // interest
  800. // -If of interest, the offset into the output UAV will be as expected
  801. // -If not, the offset is forced to (UAVsize) - (Small Amount), and that
  802. // output is ignored by the CPU-side code.
  803. // -The invocation of interest may overflow the UAV. This is handled by taking
  804. // the modulus of the
  805. // output index. Overflow is then detected on the CPU side by checking for
  806. // the presence of a canary value at (UAVSize) - (Small Amount) * 2 (which is
  807. // actually a conservative definition of overflow).
  808. //
  809. Instruction *firstInsertionPt =
  810. dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
  811. IRBuilder<> Builder(firstInsertionPt);
  812. BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
  813. m_HandleForUAV = PIXPassHelpers::CreateUAV(BC.DM, BC.Builder, 0, "PIX_DebugUAV_Handle");
  814. auto SystemValues = addRequiredSystemValues(BC);
  815. addInvocationSelectionProlog(BC, SystemValues);
  816. addInvocationStartMarker(BC);
  817. // Explicitly name new blocks in order to provide stable names for testing purposes
  818. int NewBlockCounter = 0;
  819. auto Fn = DM.GetEntryFunction();
  820. auto &Blocks = Fn->getBasicBlockList();
  821. for (auto &CurrentBlock : Blocks) {
  822. struct ValueAndPhi {
  823. Value *Val;
  824. PHINode *Phi;
  825. unsigned Index;
  826. };
  827. std::map<BasicBlock *, std::vector<ValueAndPhi>> InsertableEdges;
  828. auto &Is = CurrentBlock.getInstList();
  829. for (auto &Inst : Is) {
  830. if (Inst.getOpcode() != Instruction::OtherOps::PHI) {
  831. break;
  832. }
  833. PHINode &PN = llvm::cast<PHINode>(Inst);
  834. for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
  835. BasicBlock *PhiBB = PN.getIncomingBlock(i);
  836. Value *PhiVal = PN.getIncomingValue(i);
  837. InsertableEdges[PhiBB].push_back({PhiVal, &PN, i});
  838. }
  839. }
  840. for (auto &InsertableEdge : InsertableEdges) {
  841. auto *NewBlock = BasicBlock::Create(Ctx, "PIXDebug" + std::to_string(NewBlockCounter++),
  842. InsertableEdge.first->getParent());
  843. IRBuilder<> Builder(NewBlock);
  844. auto *PreviousBlock = InsertableEdge.first;
  845. // Modify all successor operands of the terminator in the previous block
  846. // that match the current block to point to the new block:
  847. TerminatorInst *terminator = PreviousBlock->getTerminator();
  848. unsigned NumSuccessors = terminator->getNumSuccessors();
  849. for (unsigned SuccessorIndex = 0; SuccessorIndex < NumSuccessors;
  850. ++SuccessorIndex) {
  851. auto *CurrentSuccessor = terminator->getSuccessor(SuccessorIndex);
  852. if (CurrentSuccessor == &CurrentBlock) {
  853. terminator->setSuccessor(SuccessorIndex, NewBlock);
  854. }
  855. }
  856. // Modify the Phis and add debug instrumentation
  857. for (auto &ValueNPhi : InsertableEdge.second) {
  858. // Modify the phi to refer to the new block:
  859. ValueNPhi.Phi->setIncomingBlock(ValueNPhi.Index, NewBlock);
  860. // Add instrumentation to the new block
  861. std::uint32_t RegNum;
  862. if (!pix_dxil::PixDxilReg::FromInst(ValueNPhi.Phi, &RegNum)) {
  863. continue;
  864. }
  865. std::uint32_t InstNum;
  866. if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) {
  867. continue;
  868. }
  869. BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
  870. addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum,
  871. BC.Builder.getInt32(0));
  872. }
  873. // Add a branch to the new block to point to the current block
  874. Builder.CreateBr(&CurrentBlock);
  875. }
  876. }
  877. // Instrument original instructions:
  878. for (auto &Inst : AllInstructions) {
  879. // Instrumentation goes after the instruction if it is not a terminator.
  880. // Otherwise, Instrumentation goes prior to the instruction.
  881. if (!Inst->isTerminator()) {
  882. IRBuilder<> Builder(Inst->getNextNode());
  883. BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
  884. addStepDebugEntry(BC2, Inst);
  885. } else {
  886. // Insert before this instruction
  887. IRBuilder<> Builder(Inst);
  888. BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
  889. addStepDebugEntry(BC2, Inst);
  890. }
  891. }
  892. DM.ReEmitDxilResources();
  893. return true;
  894. }
  895. char DxilDebugInstrumentation::ID = 0;
  896. ModulePass *llvm::createDxilDebugInstrumentationPass() {
  897. return new DxilDebugInstrumentation();
  898. }
  899. INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation",
  900. "HLSL DXIL debug instrumentation for PIX", false, false)