DxilDebugInstrumentation.cpp 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilDebugInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Adds instrumentation that enables shader debugging in PIX //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/DXIL/DxilModule.h"
  12. #include "dxc/DXIL/DxilOperations.h"
  13. #include "dxc/DXIL/DxilUtil.h"
  14. #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
  15. #include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
  16. #include "dxc/HLSL/DxilGenerationPass.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/IR/Constants.h"
  19. #include "llvm/IR/IRBuilder.h"
  20. #include "llvm/IR/InstIterator.h"
  21. #include "llvm/IR/Module.h"
  22. using namespace llvm;
  23. using namespace hlsl;
  24. // Overview of instrumentation:
  25. //
  26. // In summary, instructions are added that cause a "trace" of the execution of
  27. // the shader to be written out to a UAV. This trace is then used by a debugger
  28. // application to provide a post-mortem debugging experience that reconstructs
  29. // the execution history of the shader.
  30. //
  31. // The trace is only required for a particular shader instance of interest, and
  32. // a branchless mechanism is used to write the trace either to an incrementing
  33. // location within the UAV, or to a "dumping ground" area at the top of the UAV
  34. // if the instance is not of interest.
  35. //
  36. // The following modifications are made:
  37. //
  38. // First, instructions are added to the top of the entry point function that
  39. // implement the following:
  40. // - Examine the input variables that define the instance of the shader that is
  41. // running. This will
  42. // be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex
  43. // shaders, thread id for compute shaders etc. If these system values need to
  44. // be added to the shader, then they are also added to the input signature,
  45. // if appropriate.
  46. // - Compare the above variables with the instance of interest defined by the
  47. // invoker of this pass.
  48. // Deduce two values: a multiplicand and an addend that together allow a
  49. // branchless calculation of the offset into the UAV at which to write via
  50. // "offset = offset * multiplicand + addend." If the instance is NOT of
  51. // interest, the multiplicand is zero and the addend is sizeof(UAV)-(a little
  52. // bit), causing writes for uninteresting invocations to end up at the top of
  53. // the UAV. Otherwise the multiplicand is 1 and the addend is 0.
  54. // - Calculate an "instance identifier". Even with the above instance
  55. // identification, several invocations may
  56. // end up matching the selection criteria. Specifically, this happens during
  57. // a draw call in which many triangles overlap the pixel of interest. More on
  58. // this below.
  59. //
  60. // During execution, the instrumentation for most instructions cause data to be
  61. // emitted to the UAV. The index at which data is written is identified by
  62. // treating the first uint32 of the UAV as an index which is atomically
  63. // incremented by the instrumentation. The very first value of this counter that
  64. // is encountered by each invocation is used as the "instance identifier"
  65. // mentioned above. That instance identifier is written out with each packet,
  66. // since many pixel shaders executing in parallel will emit interleaved packets,
  67. // and the debugger application uses the identifiers to group packets from each
  68. // separate invocation together.
  69. //
  70. // If an instruction has a non-void and primitive return type, i.e. isn't a
  71. // struct, then the instrumentation will write that value out to the UAV as well
  72. // as part of the "step" data packet.
  73. //
  74. // The limiting size of the UAV is enforced in a branchless way by ANDing the
  75. // offset with a precomputed value that is sizeof(UAV)-64. The actual size of
  76. // the UAV allocated by the caller is required to be a power of two plus 64 for
  77. // this reason. The caller detects UAV overrun by examining a canary value close
  78. // to the end of the power-of-two size of the UAV. If this value has been
  79. // overwritten, the debug session is deemed to have overflowed the UAV. The
  80. // caller will than allocate a UAV that is twice the size and try again, up to a
  81. // predefined maximum.
  82. // Keep this in sync with the same-named value in the debugger application's
  83. // WinPixShaderUtils.h
  84. constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
  85. // These definitions echo those in the debugger application's
  86. // debugshaderrecord.h file
  87. enum DebugShaderModifierRecordType {
  88. DebugShaderModifierRecordTypeInvocationStartMarker,
  89. DebugShaderModifierRecordTypeStep,
  90. DebugShaderModifierRecordTypeEvent,
  91. DebugShaderModifierRecordTypeInputRegister,
  92. DebugShaderModifierRecordTypeReadRegister,
  93. DebugShaderModifierRecordTypeWrittenRegister,
  94. DebugShaderModifierRecordTypeRegisterRelativeIndex0,
  95. DebugShaderModifierRecordTypeRegisterRelativeIndex1,
  96. DebugShaderModifierRecordTypeRegisterRelativeIndex2,
  97. DebugShaderModifierRecordTypeDXILStepVoid = 251,
  98. DebugShaderModifierRecordTypeDXILStepFloat = 252,
  99. DebugShaderModifierRecordTypeDXILStepUint32 = 253,
  100. DebugShaderModifierRecordTypeDXILStepUint64 = 254,
  101. DebugShaderModifierRecordTypeDXILStepDouble = 255,
  102. };
  103. // These structs echo those in the debugger application's debugshaderrecord.h
  104. // file, but are recapitulated here because the originals use unnamed unions
  105. // which are disallowed by DXCompiler's build.
  106. //
  107. #pragma pack(push, 4)
  108. struct DebugShaderModifierRecordHeader {
  109. union {
  110. struct {
  111. uint32_t SizeDwords : 4;
  112. uint32_t Flags : 4;
  113. uint32_t Type : 8;
  114. uint32_t HeaderPayload : 16;
  115. } Details;
  116. uint32_t u32Header;
  117. } Header;
  118. uint32_t UID;
  119. };
  120. struct DebugShaderModifierRecordDXILStepBase {
  121. union {
  122. struct {
  123. uint32_t SizeDwords : 4;
  124. uint32_t Flags : 4;
  125. uint32_t Type : 8;
  126. uint32_t Opcode : 16;
  127. } Details;
  128. uint32_t u32Header;
  129. } Header;
  130. uint32_t UID;
  131. uint32_t InstructionOffset;
  132. };
  133. template <typename ReturnType>
  134. struct DebugShaderModifierRecordDXILStep
  135. : public DebugShaderModifierRecordDXILStepBase {
  136. ReturnType ReturnValue;
  137. union {
  138. struct {
  139. uint32_t ValueOrdinalBase : 16;
  140. uint32_t ValueOrdinalIndex : 16;
  141. } Details;
  142. uint32_t u32ValueOrdinal;
  143. } ValueOrdinal;
  144. };
  145. template <>
  146. struct DebugShaderModifierRecordDXILStep<void>
  147. : public DebugShaderModifierRecordDXILStepBase {};
  148. #pragma pack(pop)
  149. uint32_t
  150. DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
  151. return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) /
  152. sizeof(uint32_t));
  153. }
  154. class DxilDebugInstrumentation : public ModulePass {
  155. private:
  156. union ParametersAllTogether {
  157. unsigned Parameters[3];
  158. struct PixelShaderParameters {
  159. unsigned X;
  160. unsigned Y;
  161. } PixelShader;
  162. struct VertexShaderParameters {
  163. unsigned VertexId;
  164. unsigned InstanceId;
  165. } VertexShader;
  166. struct ComputeShaderParameters {
  167. unsigned ThreadIdX;
  168. unsigned ThreadIdY;
  169. unsigned ThreadIdZ;
  170. } ComputeShader;
  171. struct GeometryShaderParameters {
  172. unsigned PrimitiveId;
  173. unsigned InstanceId;
  174. } GeometryShader;
  175. } m_Parameters = {{0, 0, 0}};
  176. union SystemValueIndices {
  177. struct PixelShaderParameters {
  178. unsigned Position;
  179. } PixelShader;
  180. struct VertexShaderParameters {
  181. unsigned VertexId;
  182. unsigned InstanceId;
  183. } VertexShader;
  184. struct GeometryShaderParameters {
  185. unsigned PrimitiveId;
  186. unsigned InstanceId;
  187. } GeometryShader;
  188. };
  189. uint64_t m_UAVSize = 1024 * 1024;
  190. Value *m_SelectionCriterion = nullptr;
  191. CallInst *m_HandleForUAV = nullptr;
  192. Value *m_InvocationId = nullptr;
  193. // Together these two values allow branchless writing to the UAV. An
  194. // invocation of the shader is either of interest or not (e.g. it writes to
  195. // the pixel the user selected for debugging or it doesn't). If not of
  196. // interest, debugging output will still occur, but it will be relegated to
  197. // the very top few bytes of the UAV. Invocations of interest, by contrast,
  198. // will be written to the UAV at sequentially increasing offsets.
  199. // This value will either be one or zero (one if the invocation is of
  200. // interest, zero otherwise)
  201. Value *m_OffsetMultiplicand = nullptr;
  202. // This will either be zero (if the invocation is of interest) or
  203. // (UAVSize)-(SmallValue) if not.
  204. Value *m_OffsetAddend = nullptr;
  205. Constant *m_OffsetMask = nullptr;
  206. struct BuilderContext {
  207. Module &M;
  208. DxilModule &DM;
  209. LLVMContext &Ctx;
  210. OP *HlslOP;
  211. IRBuilder<> &Builder;
  212. };
  213. uint32_t m_RemainingReservedSpaceInBytes = 0;
  214. Value *m_CurrentIndex = nullptr;
  215. public:
  216. static char ID; // Pass identification, replacement for typeid
  217. explicit DxilDebugInstrumentation() : ModulePass(ID) {}
  218. const char *getPassName() const override {
  219. return "Add PIX debug instrumentation";
  220. }
  221. void applyOptions(PassOptions O) override;
  222. bool runOnModule(Module &M) override;
  223. private:
  224. SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
  225. void addUAV(BuilderContext &BC);
  226. void addInvocationSelectionProlog(BuilderContext &BC,
  227. SystemValueIndices SVIndices);
  228. Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  229. Value *addGeometryShaderProlog(BuilderContext &BC,
  230. SystemValueIndices SVIndices);
  231. Value *addDispatchedShaderProlog(BuilderContext &BC);
  232. Value *addVertexShaderProlog(BuilderContext &BC,
  233. SystemValueIndices SVIndices);
  234. void addDebugEntryValue(BuilderContext &BC, Value *TheValue);
  235. void addInvocationStartMarker(BuilderContext &BC);
  236. void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
  237. void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst);
  238. void addStepDebugEntry(BuilderContext &BC, Instruction *Inst);
  239. void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum,
  240. Value *V, std::uint32_t ValueOrdinal,
  241. Value *ValueOrdinalIndex);
  242. uint32_t UAVDumpingGroundOffset();
  243. template <typename ReturnType>
  244. void addStepEntryForType(DebugShaderModifierRecordType RecordType,
  245. BuilderContext &BC, std::uint32_t InstNum, Value *V,
  246. std::uint32_t ValueOrdinal,
  247. Value *ValueOrdinalIndex);
  248. };
  249. void DxilDebugInstrumentation::applyOptions(PassOptions O) {
  250. GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
  251. GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
  252. GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
  253. GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
  254. }
  255. uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
  256. return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
  257. }
  258. DxilDebugInstrumentation::SystemValueIndices
  259. DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) {
  260. SystemValueIndices SVIndices{};
  261. hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature();
  262. auto &InputElements = InputSignature.GetElements();
  263. auto ShaderModel = BC.DM.GetShaderModel();
  264. switch (ShaderModel->GetKind()) {
  265. case DXIL::ShaderKind::Amplification:
  266. case DXIL::ShaderKind::Mesh:
  267. case DXIL::ShaderKind::Compute:
  268. // Dispatch* thread Id is not in the input signature
  269. break;
  270. case DXIL::ShaderKind::Vertex: {
  271. {
  272. auto Existing_SV_VertexId = std::find_if(
  273. InputElements.begin(), InputElements.end(),
  274. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  275. return Element->GetSemantic()->GetKind() ==
  276. hlsl::DXIL::SemanticKind::VertexID;
  277. });
  278. if (Existing_SV_VertexId == InputElements.end()) {
  279. auto Added_SV_VertexId =
  280. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  281. Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(),
  282. hlsl::DXIL::InterpolationMode::Undefined,
  283. 1, 1);
  284. Added_SV_VertexId->AppendSemanticIndex(0);
  285. Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  286. Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID);
  287. auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId));
  288. SVIndices.VertexShader.VertexId = InputElements[index]->GetID();
  289. } else {
  290. SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID();
  291. }
  292. }
  293. {
  294. auto Existing_SV_InstanceId = std::find_if(
  295. InputElements.begin(), InputElements.end(),
  296. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  297. return Element->GetSemantic()->GetKind() ==
  298. hlsl::DXIL::SemanticKind::InstanceID;
  299. });
  300. if (Existing_SV_InstanceId == InputElements.end()) {
  301. auto Added_SV_InstanceId =
  302. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  303. Added_SV_InstanceId->Initialize(
  304. "InstanceId", hlsl::CompType::getF32(),
  305. hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
  306. Added_SV_InstanceId->AppendSemanticIndex(0);
  307. Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  308. Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID);
  309. auto index =
  310. InputSignature.AppendElement(std::move(Added_SV_InstanceId));
  311. SVIndices.VertexShader.InstanceId = InputElements[index]->GetID();
  312. } else {
  313. SVIndices.VertexShader.InstanceId =
  314. Existing_SV_InstanceId->get()->GetID();
  315. }
  316. }
  317. } break;
  318. case DXIL::ShaderKind::Geometry:
  319. // GS Instance Id and Primitive Id are not in the input signature
  320. break;
  321. case DXIL::ShaderKind::Pixel: {
  322. auto Existing_SV_Position =
  323. std::find_if(InputElements.begin(), InputElements.end(),
  324. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  325. return Element->GetSemantic()->GetKind() ==
  326. hlsl::DXIL::SemanticKind::Position;
  327. });
  328. // SV_Position, if present, has to have full mask, so we needn't worry
  329. // about the shader having selected components that don't include x or y.
  330. // If not present, we add it.
  331. if (Existing_SV_Position == InputElements.end()) {
  332. auto Added_SV_Position =
  333. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  334. Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(),
  335. hlsl::DXIL::InterpolationMode::Linear, 1,
  336. 4);
  337. Added_SV_Position->AppendSemanticIndex(0);
  338. Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
  339. Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
  340. auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
  341. SVIndices.PixelShader.Position = InputElements[index]->GetID();
  342. } else {
  343. SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
  344. }
  345. } break;
  346. default:
  347. assert(false); // guaranteed by runOnModule
  348. }
  349. return SVIndices;
  350. }
  351. Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) {
  352. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  353. Constant *One32Arg = BC.HlslOP->GetU32Const(1);
  354. Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
  355. auto ThreadIdFunc =
  356. BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
  357. Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
  358. auto ThreadIdX =
  359. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX");
  360. auto ThreadIdY =
  361. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY");
  362. auto ThreadIdZ =
  363. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ");
  364. // Compare to expected thread ID
  365. auto CompareToX = BC.Builder.CreateICmpEQ(
  366. ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX),
  367. "CompareToThreadIdX");
  368. auto CompareToY = BC.Builder.CreateICmpEQ(
  369. ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY),
  370. "CompareToThreadIdY");
  371. auto CompareToZ = BC.Builder.CreateICmpEQ(
  372. ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ),
  373. "CompareToThreadIdZ");
  374. auto CompareXAndY =
  375. BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
  376. auto CompareAll =
  377. BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
  378. return CompareAll;
  379. }
  380. Value *
  381. DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC,
  382. SystemValueIndices SVIndices) {
  383. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  384. Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
  385. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  386. auto LoadInputOpFunc =
  387. BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
  388. Constant *LoadInputOpcode =
  389. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  390. Constant *SV_Vert_ID =
  391. BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
  392. auto VertId =
  393. BC.Builder.CreateCall(LoadInputOpFunc,
  394. {LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/,
  395. Zero8Arg /*column*/, UndefArg},
  396. "VertId");
  397. Constant *SV_Instance_ID =
  398. BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
  399. auto InstanceId =
  400. BC.Builder.CreateCall(LoadInputOpFunc,
  401. {LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/,
  402. Zero8Arg /*column*/, UndefArg},
  403. "InstanceId");
  404. // Compare to expected vertex ID and instance ID
  405. auto CompareToVert = BC.Builder.CreateICmpEQ(
  406. VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId),
  407. "CompareToVertId");
  408. auto CompareToInstance = BC.Builder.CreateICmpEQ(
  409. InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId),
  410. "CompareToInstanceId");
  411. auto CompareBoth =
  412. BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
  413. return CompareBoth;
  414. }
  415. Value *DxilDebugInstrumentation::addGeometryShaderProlog(
  416. BuilderContext &BC, SystemValueIndices SVIndices) {
  417. auto PrimitiveIdOpFunc =
  418. BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx));
  419. Constant *PrimitiveIdOpcode =
  420. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID);
  421. auto PrimId =
  422. BC.Builder.CreateCall(PrimitiveIdOpFunc, {PrimitiveIdOpcode}, "PrimId");
  423. auto CompareToPrim = BC.Builder.CreateICmpEQ(
  424. PrimId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.PrimitiveId),
  425. "CompareToPrimId");
  426. if (BC.DM.GetGSInstanceCount() <= 1) {
  427. return CompareToPrim;
  428. }
  429. auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID,
  430. Type::getInt32Ty(BC.Ctx));
  431. Constant *GSInstanceIdOpcode =
  432. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID);
  433. auto GSInstanceId = BC.Builder.CreateCall(
  434. GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId");
  435. // Compare to expected vertex ID and instance ID
  436. auto CompareToInstance = BC.Builder.CreateICmpEQ(
  437. GSInstanceId,
  438. BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId),
  439. "CompareToInstanceId");
  440. auto CompareBoth =
  441. BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth");
  442. return CompareBoth;
  443. }
  444. Value *
  445. DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
  446. SystemValueIndices SVIndices) {
  447. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  448. Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
  449. Constant *One8Arg = BC.HlslOP->GetI8Const(1);
  450. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  451. // Convert SV_POSITION to UINT
  452. Value *XAsInt;
  453. Value *YAsInt;
  454. {
  455. auto LoadInputOpFunc =
  456. BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
  457. Constant *LoadInputOpcode =
  458. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  459. Constant *SV_Pos_ID =
  460. BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
  461. auto XPos =
  462. BC.Builder.CreateCall(LoadInputOpFunc,
  463. {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
  464. Zero8Arg /*column*/, UndefArg},
  465. "XPos");
  466. auto YPos =
  467. BC.Builder.CreateCall(LoadInputOpFunc,
  468. {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
  469. One8Arg /*column*/, UndefArg},
  470. "YPos");
  471. XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos,
  472. Type::getInt32Ty(BC.Ctx), "XIndex");
  473. YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos,
  474. Type::getInt32Ty(BC.Ctx), "YIndex");
  475. }
  476. // Compare to expected pixel position and primitive ID
  477. auto CompareToX = BC.Builder.CreateICmpEQ(
  478. XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
  479. auto CompareToY = BC.Builder.CreateICmpEQ(
  480. YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
  481. auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
  482. return ComparePos;
  483. }
  484. void DxilDebugInstrumentation::addUAV(BuilderContext &BC) {
  485. // Set up a UAV with structure of a single int
  486. unsigned int UAVResourceHandle =
  487. static_cast<unsigned int>(BC.DM.GetUAVs().size());
  488. SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
  489. llvm::StructType *UAVStructTy =
  490. llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
  491. std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
  492. pUAV->SetGlobalName("PIX_DebugUAVName");
  493. pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
  494. pUAV->SetID(UAVResourceHandle);
  495. pUAV->SetSpaceID(
  496. (unsigned int)-2); // This is the reserved-for-tools register space
  497. pUAV->SetSampleCount(1);
  498. pUAV->SetGloballyCoherent(false);
  499. pUAV->SetHasCounter(false);
  500. pUAV->SetCompType(CompType::getI32());
  501. pUAV->SetLowerBound(0);
  502. pUAV->SetRangeSize(1);
  503. pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
  504. pUAV->SetRW(true);
  505. auto ID = BC.DM.AddUAV(std::move(pUAV));
  506. assert(ID == UAVResourceHandle);
  507. BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
  508. // Create handle for the newly-added UAV
  509. Function *CreateHandleOpFunc =
  510. BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
  511. Constant *CreateHandleOpcodeArg =
  512. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
  513. Constant *UAVVArg = BC.HlslOP->GetI8Const(
  514. static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
  515. DXIL::ResourceClass::UAV));
  516. Constant *MetaDataArg = BC.HlslOP->GetU32Const(
  517. ID); // position of the metadata record in the corresponding metadata list
  518. Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
  519. Constant *FalseArg =
  520. BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
  521. m_HandleForUAV = BC.Builder.CreateCall(
  522. CreateHandleOpFunc,
  523. {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
  524. "PIX_DebugUAV_Handle");
  525. }
  526. void DxilDebugInstrumentation::addInvocationSelectionProlog(
  527. BuilderContext &BC, SystemValueIndices SVIndices) {
  528. auto ShaderModel = BC.DM.GetShaderModel();
  529. Value *ParameterTestResult = nullptr;
  530. switch (ShaderModel->GetKind()) {
  531. case DXIL::ShaderKind::Compute:
  532. case DXIL::ShaderKind::Amplification:
  533. case DXIL::ShaderKind::Mesh:
  534. ParameterTestResult = addDispatchedShaderProlog(BC);
  535. break;
  536. case DXIL::ShaderKind::Geometry:
  537. ParameterTestResult = addGeometryShaderProlog(BC, SVIndices);
  538. break;
  539. case DXIL::ShaderKind::Vertex:
  540. ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
  541. break;
  542. case DXIL::ShaderKind::Pixel:
  543. ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
  544. break;
  545. default:
  546. assert(false); // guaranteed by runOnModule
  547. }
  548. // This is a convenient place to calculate the values that modify the UAV
  549. // offset for invocations of interest and for UAV size.
  550. m_OffsetMultiplicand =
  551. BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult,
  552. Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand");
  553. auto InverseOffsetMultiplicand =
  554. BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand,
  555. "ComplementOfMultiplicand");
  556. m_OffsetAddend =
  557. BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()),
  558. InverseOffsetMultiplicand, "OffsetAddend");
  559. m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
  560. m_SelectionCriterion = ParameterTestResult;
  561. }
  562. void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC,
  563. uint32_t SpaceInBytes) {
  564. assert(m_CurrentIndex == nullptr);
  565. assert(m_RemainingReservedSpaceInBytes == 0);
  566. m_RemainingReservedSpaceInBytes = SpaceInBytes;
  567. // Insert the UAV increment instruction:
  568. Function *AtomicOpFunc =
  569. BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
  570. Constant *AtomicBinOpcode =
  571. BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  572. Constant *AtomicAdd =
  573. BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  574. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  575. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  576. // so inc will be zero for uninteresting invocations:
  577. Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
  578. Value *IncrementForThisInvocation = BC.Builder.CreateMul(
  579. Increment, m_OffsetMultiplicand, "IncrementForThisInvocation");
  580. auto PreviousValue = BC.Builder.CreateCall(
  581. AtomicOpFunc,
  582. {
  583. AtomicBinOpcode, // i32, ; opcode
  584. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  585. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
  586. // XOR, IMIN, IMAX, UMIN, UMAX
  587. Zero32Arg, // i32, ; coordinate c0: index in bytes
  588. UndefArg, // i32, ; coordinate c1 (unused)
  589. UndefArg, // i32, ; coordinate c2 (unused)
  590. IncrementForThisInvocation, // i32); increment value
  591. },
  592. "UAVIncResult");
  593. if (m_InvocationId == nullptr) {
  594. m_InvocationId = PreviousValue;
  595. }
  596. auto MaskedForLimit =
  597. BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
  598. // The return value will either end up being itself (multiplied by one and
  599. // added with zero) or the "dump uninteresting things here" value of (UAVSize
  600. // - a bit).
  601. auto MultipliedForInterest = BC.Builder.CreateMul(
  602. MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest");
  603. auto AddedForInterest = BC.Builder.CreateAdd(
  604. MultipliedForInterest, m_OffsetAddend, "AddedForInterest");
  605. m_CurrentIndex = AddedForInterest;
  606. }
  607. void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC,
  608. Value *TheValue) {
  609. assert(m_RemainingReservedSpaceInBytes > 0);
  610. auto TheValueTypeID = TheValue->getType()->getTypeID();
  611. if (TheValueTypeID == Type::TypeID::DoubleTyID) {
  612. Function *SplitDouble =
  613. BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
  614. Constant *SplitDoubleOpcode =
  615. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
  616. auto SplitDoubleIntruction = BC.Builder.CreateCall(
  617. SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble");
  618. auto LowBits =
  619. BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
  620. auto HighBits =
  621. BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
  622. // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  623. addDebugEntryValue(BC, LowBits);
  624. addDebugEntryValue(BC, HighBits);
  625. } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  626. TheValue->getType()->getIntegerBitWidth() == 64) {
  627. auto LowBits =
  628. BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
  629. auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
  630. auto HighBits = BC.Builder.CreateTrunc(
  631. ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
  632. // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  633. addDebugEntryValue(BC, LowBits);
  634. addDebugEntryValue(BC, HighBits);
  635. } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  636. (TheValue->getType()->getIntegerBitWidth() == 16 ||
  637. TheValue->getType()->getIntegerBitWidth() == 1)) {
  638. auto As32 =
  639. BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
  640. addDebugEntryValue(BC, As32);
  641. } else if (TheValueTypeID == Type::TypeID::HalfTyID) {
  642. auto AsFloat =
  643. BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
  644. addDebugEntryValue(BC, AsFloat);
  645. } else {
  646. Function *StoreValue =
  647. BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore,
  648. TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
  649. Constant *StoreValueOpcode =
  650. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
  651. UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  652. UndefValue *UndefArg = nullptr;
  653. if (TheValueTypeID == Type::TypeID::IntegerTyID) {
  654. UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  655. } else if (TheValueTypeID == Type::TypeID::FloatTyID) {
  656. UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx));
  657. } else {
  658. // The above are the only two valid types for a UAV store
  659. assert(false);
  660. }
  661. Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
  662. (void)BC.Builder.CreateCall(
  663. StoreValue, {StoreValueOpcode, // i32 opcode
  664. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  665. m_CurrentIndex, // i32 c0: index in bytes into UAV
  666. Undef32Arg, // i32 c1: unused
  667. TheValue,
  668. UndefArg, // unused values
  669. UndefArg, // unused values
  670. UndefArg, // unused values
  671. WriteMask_X});
  672. m_RemainingReservedSpaceInBytes -= 4;
  673. assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow
  674. if (m_RemainingReservedSpaceInBytes != 0) {
  675. m_CurrentIndex =
  676. BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4));
  677. } else {
  678. m_CurrentIndex = nullptr;
  679. }
  680. }
  681. }
  682. void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
  683. DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0};
  684. reserveDebugEntrySpace(BC, sizeof(marker));
  685. marker.Header.Details.SizeDwords =
  686. DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));
  687. ;
  688. marker.Header.Details.Flags = 0;
  689. marker.Header.Details.Type =
  690. DebugShaderModifierRecordTypeInvocationStartMarker;
  691. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
  692. addDebugEntryValue(BC, m_InvocationId);
  693. }
  694. template <typename ReturnType>
  695. void DxilDebugInstrumentation::addStepEntryForType(
  696. DebugShaderModifierRecordType RecordType, BuilderContext &BC,
  697. std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal,
  698. Value *ValueOrdinalIndex) {
  699. DebugShaderModifierRecordDXILStep<ReturnType> step = {};
  700. reserveDebugEntrySpace(BC, sizeof(step));
  701. step.Header.Details.SizeDwords =
  702. DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
  703. step.Header.Details.Type = static_cast<uint8_t>(RecordType);
  704. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
  705. addDebugEntryValue(BC, m_InvocationId);
  706. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum));
  707. if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) {
  708. addDebugEntryValue(BC, V);
  709. IRBuilder<> &B = BC.Builder;
  710. Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16);
  711. Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF),
  712. "ValueOrdinalIndex");
  713. Value *EncodedValueOrdinalAndIndex =
  714. BC.Builder.CreateOr(VO, VOI, "ValueOrdinal");
  715. addDebugEntryValue(BC, EncodedValueOrdinalAndIndex);
  716. }
  717. }
  718. void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext &BC,
  719. StoreInst *Inst) {
  720. std::uint32_t ValueOrdinalBase;
  721. std::uint32_t UnusedValueOrdinalSize;
  722. llvm::Value *ValueOrdinalIndex;
  723. if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
  724. &UnusedValueOrdinalSize,
  725. &ValueOrdinalIndex)) {
  726. return;
  727. }
  728. std::uint32_t InstNum;
  729. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  730. return;
  731. }
  732. addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase,
  733. ValueOrdinalIndex);
  734. }
  735. void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC,
  736. Instruction *Inst) {
  737. if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
  738. return;
  739. }
  740. if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
  741. addStoreStepDebugEntry(BC, St);
  742. return;
  743. }
  744. std::uint32_t RegNum;
  745. if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) {
  746. return;
  747. }
  748. std::uint32_t InstNum;
  749. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  750. return;
  751. }
  752. addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0));
  753. }
  754. void DxilDebugInstrumentation::addStepDebugEntryValue(
  755. BuilderContext &BC, std::uint32_t InstNum, Value *V,
  756. std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) {
  757. const Type::TypeID ID = V->getType()->getTypeID();
  758. switch (ID) {
  759. case Type::TypeID::StructTyID:
  760. case Type::TypeID::VoidTyID:
  761. addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC,
  762. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  763. break;
  764. case Type::TypeID::FloatTyID:
  765. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
  766. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  767. break;
  768. case Type::TypeID::IntegerTyID:
  769. if (V->getType()->getIntegerBitWidth() == 64) {
  770. addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64,
  771. BC, InstNum, V, ValueOrdinal,
  772. ValueOrdinalIndex);
  773. } else {
  774. addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32,
  775. BC, InstNum, V, ValueOrdinal,
  776. ValueOrdinalIndex);
  777. }
  778. break;
  779. case Type::TypeID::DoubleTyID:
  780. addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC,
  781. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  782. break;
  783. case Type::TypeID::HalfTyID:
  784. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
  785. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  786. break;
  787. case Type::TypeID::PointerTyID:
  788. // Skip pointer calculation instructions. They aren't particularly
  789. // meaningful to the user (being a mere implementation detail for lookup
  790. // tables, etc.), and their type is problematic from a UI point of view. The
  791. // subsequent instructions that dereference the pointer will be properly
  792. // instrumented and show the (meaningful) retrieved value.
  793. break;
  794. case Type::TypeID::FP128TyID:
  795. case Type::TypeID::LabelTyID:
  796. case Type::TypeID::MetadataTyID:
  797. case Type::TypeID::FunctionTyID:
  798. case Type::TypeID::ArrayTyID:
  799. case Type::TypeID::VectorTyID:
  800. case Type::TypeID::X86_FP80TyID:
  801. case Type::TypeID::X86_MMXTyID:
  802. case Type::TypeID::PPC_FP128TyID:
  803. assert(false);
  804. }
  805. }
  806. bool DxilDebugInstrumentation::runOnModule(Module &M) {
  807. DxilModule &DM = M.GetOrCreateDxilModule();
  808. LLVMContext &Ctx = M.getContext();
  809. OP *HlslOP = DM.GetOP();
  810. auto ShaderModel = DM.GetShaderModel();
  811. switch (ShaderModel->GetKind()) {
  812. case DXIL::ShaderKind::Amplification:
  813. case DXIL::ShaderKind::Mesh:
  814. case DXIL::ShaderKind::Vertex:
  815. case DXIL::ShaderKind::Geometry:
  816. case DXIL::ShaderKind::Pixel:
  817. case DXIL::ShaderKind::Compute:
  818. break;
  819. default:
  820. return false;
  821. }
  822. // First record pointers to all instructions in the function:
  823. std::vector<Instruction *> AllInstructions;
  824. for (inst_iterator I = inst_begin(DM.GetEntryFunction()),
  825. E = inst_end(DM.GetEntryFunction());
  826. I != E; ++I) {
  827. AllInstructions.push_back(&*I);
  828. }
  829. // Branchless instrumentation requires taking care of a few things:
  830. // -Each invocation of the shader will be either of interest or not of
  831. // interest
  832. // -If of interest, the offset into the output UAV will be as expected
  833. // -If not, the offset is forced to (UAVsize) - (Small Amount), and that
  834. // output is ignored by the CPU-side code.
  835. // -The invocation of interest may overflow the UAV. This is handled by taking
  836. // the modulus of the
  837. // output index. Overflow is then detected on the CPU side by checking for
  838. // the presence of a canary value at (UAVSize) - (Small Amount) * 2 (which is
  839. // actually a conservative definition of overflow).
  840. //
  841. Instruction *firstInsertionPt =
  842. dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
  843. IRBuilder<> Builder(firstInsertionPt);
  844. BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
  845. addUAV(BC);
  846. auto SystemValues = addRequiredSystemValues(BC);
  847. addInvocationSelectionProlog(BC, SystemValues);
  848. addInvocationStartMarker(BC);
  849. // Explicitly name new blocks in order to provide stable names for testing purposes
  850. int NewBlockCounter = 0;
  851. auto Fn = DM.GetEntryFunction();
  852. auto &Blocks = Fn->getBasicBlockList();
  853. for (auto &CurrentBlock : Blocks) {
  854. struct ValueAndPhi {
  855. Value *Val;
  856. PHINode *Phi;
  857. unsigned Index;
  858. };
  859. std::map<BasicBlock *, std::vector<ValueAndPhi>> InsertableEdges;
  860. auto &Is = CurrentBlock.getInstList();
  861. for (auto &Inst : Is) {
  862. if (Inst.getOpcode() != Instruction::OtherOps::PHI) {
  863. break;
  864. }
  865. PHINode &PN = llvm::cast<PHINode>(Inst);
  866. for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
  867. BasicBlock *PhiBB = PN.getIncomingBlock(i);
  868. Value *PhiVal = PN.getIncomingValue(i);
  869. InsertableEdges[PhiBB].push_back({PhiVal, &PN, i});
  870. }
  871. }
  872. for (auto &InsertableEdge : InsertableEdges) {
  873. auto *NewBlock = BasicBlock::Create(Ctx, "PIXDebug" + std::to_string(NewBlockCounter++),
  874. InsertableEdge.first->getParent());
  875. IRBuilder<> Builder(NewBlock);
  876. auto *PreviousBlock = InsertableEdge.first;
  877. // Modify all successor operands of the terminator in the previous block
  878. // that match the current block to point to the new block:
  879. TerminatorInst *terminator = PreviousBlock->getTerminator();
  880. unsigned NumSuccessors = terminator->getNumSuccessors();
  881. for (unsigned SuccessorIndex = 0; SuccessorIndex < NumSuccessors;
  882. ++SuccessorIndex) {
  883. auto *CurrentSuccessor = terminator->getSuccessor(SuccessorIndex);
  884. if (CurrentSuccessor == &CurrentBlock) {
  885. terminator->setSuccessor(SuccessorIndex, NewBlock);
  886. }
  887. }
  888. // Modify the Phis and add debug instrumentation
  889. for (auto &ValueNPhi : InsertableEdge.second) {
  890. // Modify the phi to refer to the new block:
  891. ValueNPhi.Phi->setIncomingBlock(ValueNPhi.Index, NewBlock);
  892. // Add instrumentation to the new block
  893. std::uint32_t RegNum;
  894. if (!pix_dxil::PixDxilReg::FromInst(ValueNPhi.Phi, &RegNum)) {
  895. continue;
  896. }
  897. std::uint32_t InstNum;
  898. if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) {
  899. continue;
  900. }
  901. BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
  902. addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum,
  903. BC.Builder.getInt32(0));
  904. }
  905. // Add a branch to the new block to point to the current block
  906. Builder.CreateBr(&CurrentBlock);
  907. }
  908. }
  909. // Instrument original instructions:
  910. for (auto &Inst : AllInstructions) {
  911. // Instrumentation goes after the instruction if it is not a terminator.
  912. // Otherwise, Instrumentation goes prior to the instruction.
  913. if (!Inst->isTerminator()) {
  914. IRBuilder<> Builder(Inst->getNextNode());
  915. BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
  916. addStepDebugEntry(BC2, Inst);
  917. } else {
  918. // Insert before this instruction
  919. IRBuilder<> Builder(Inst);
  920. BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
  921. addStepDebugEntry(BC2, Inst);
  922. }
  923. }
  924. DM.ReEmitDxilResources();
  925. return true;
  926. }
  927. char DxilDebugInstrumentation::ID = 0;
  928. ModulePass *llvm::createDxilDebugInstrumentationPass() {
  929. return new DxilDebugInstrumentation();
  930. }
  931. INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation",
  932. "HLSL DXIL debug instrumentation for PIX", false, false)