DxilDebugInstrumentation.cpp 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilDebugInstrumentation.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Adds instrumentation that enables shader debugging in PIX //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/DXIL/DxilModule.h"
  12. #include "dxc/DXIL/DxilOperations.h"
  13. #include "dxc/DXIL/DxilUtil.h"
  14. #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
  15. #include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
  16. #include "dxc/HLSL/DxilGenerationPass.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/IR/Constants.h"
  19. #include "llvm/IR/IRBuilder.h"
  20. #include "llvm/IR/InstIterator.h"
  21. #include "llvm/IR/Module.h"
  22. using namespace llvm;
  23. using namespace hlsl;
  24. // Overview of instrumentation:
  25. //
  26. // In summary, instructions are added that cause a "trace" of the execution of
  27. // the shader to be written out to a UAV. This trace is then used by a debugger
  28. // application to provide a post-mortem debugging experience that reconstructs
  29. // the execution history of the shader.
  30. //
  31. // The trace is only required for a particular shader instance of interest, and
  32. // a branchless mechanism is used to write the trace either to an incrementing
  33. // location within the UAV, or to a "dumping ground" area at the top of the UAV
  34. // if the instance is not of interest.
  35. //
  36. // The following modifications are made:
  37. //
  38. // First, instructions are added to the top of the entry point function that
  39. // implement the following:
  40. // - Examine the input variables that define the instance of the shader that is
  41. // running. This will
  42. // be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex
  43. // shaders, thread id for compute shaders etc. If these system values need to
  44. // be added to the shader, then they are also added to the input signature,
  45. // if appropriate.
  46. // - Compare the above variables with the instance of interest defined by the
  47. // invoker of this pass.
  48. // Deduce two values: a multiplicand and an addend that together allow a
  49. // branchless calculation of the offset into the UAV at which to write via
  50. // "offset = offset * multiplicand + addend." If the instance is NOT of
  51. // interest, the multiplicand is zero and the addend is sizeof(UAV)-(a little
  52. // bit), causing writes for uninteresting invocations to end up at the top of
  53. // the UAV. Otherwise the multiplicand is 1 and the addend is 0.
  54. // - Calculate an "instance identifier". Even with the above instance
  55. // identification, several invocations may
  56. // end up matching the selection criteria. Specifically, this happens during
  57. // a draw call in which many triangles overlap the pixel of interest. More on
  58. // this below.
  59. //
  60. // During execution, the instrumentation for most instructions cause data to be
  61. // emitted to the UAV. The index at which data is written is identified by
  62. // treating the first uint32 of the UAV as an index which is atomically
  63. // incremented by the instrumentation. The very first value of this counter that
  64. // is encountered by each invocation is used as the "instance identifier"
  65. // mentioned above. That instance identifier is written out with each packet,
  66. // since many pixel shaders executing in parallel will emit interleaved packets,
  67. // and the debugger application uses the identifiers to group packets from each
  68. // separate invocation together.
  69. //
  70. // If an instruction has a non-void and primitive return type, i.e. isn't a
  71. // struct, then the instrumentation will write that value out to the UAV as well
  72. // as part of the "step" data packet.
  73. //
  74. // The limiting size of the UAV is enforced in a branchless way by ANDing the
  75. // offset with a precomputed value that is sizeof(UAV)-64. The actual size of
  76. // the UAV allocated by the caller is required to be a power of two plus 64 for
  77. // this reason. The caller detects UAV overrun by examining a canary value close
  78. // to the end of the power-of-two size of the UAV. If this value has been
  79. // overwritten, the debug session is deemed to have overflowed the UAV. The
  80. // caller will than allocate a UAV that is twice the size and try again, up to a
  81. // predefined maximum.
  82. // Keep these in sync with the same-named value in the debugger application's
  83. // WinPixShaderUtils.h
  84. constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
  85. // The actual max size per record is much smaller than this, but it never
  86. // hurts to be generous.
  87. constexpr size_t CounterOffsetBeyondUsefulData = DebugBufferDumpingGroundSize / 2;
  88. // These definitions echo those in the debugger application's
  89. // debugshaderrecord.h file
  90. enum DebugShaderModifierRecordType {
  91. DebugShaderModifierRecordTypeInvocationStartMarker,
  92. DebugShaderModifierRecordTypeStep,
  93. DebugShaderModifierRecordTypeEvent,
  94. DebugShaderModifierRecordTypeInputRegister,
  95. DebugShaderModifierRecordTypeReadRegister,
  96. DebugShaderModifierRecordTypeWrittenRegister,
  97. DebugShaderModifierRecordTypeRegisterRelativeIndex0,
  98. DebugShaderModifierRecordTypeRegisterRelativeIndex1,
  99. DebugShaderModifierRecordTypeRegisterRelativeIndex2,
  100. DebugShaderModifierRecordTypeDXILStepVoid = 251,
  101. DebugShaderModifierRecordTypeDXILStepFloat = 252,
  102. DebugShaderModifierRecordTypeDXILStepUint32 = 253,
  103. DebugShaderModifierRecordTypeDXILStepUint64 = 254,
  104. DebugShaderModifierRecordTypeDXILStepDouble = 255,
  105. };
  106. // These structs echo those in the debugger application's debugshaderrecord.h
  107. // file, but are recapitulated here because the originals use unnamed unions
  108. // which are disallowed by DXCompiler's build.
  109. //
  110. #pragma pack(push, 4)
  111. struct DebugShaderModifierRecordHeader {
  112. union {
  113. struct {
  114. uint32_t SizeDwords : 4;
  115. uint32_t Flags : 4;
  116. uint32_t Type : 8;
  117. uint32_t HeaderPayload : 16;
  118. } Details;
  119. uint32_t u32Header;
  120. } Header;
  121. uint32_t UID;
  122. };
  123. struct DebugShaderModifierRecordDXILStepBase {
  124. union {
  125. struct {
  126. uint32_t SizeDwords : 4;
  127. uint32_t Flags : 4;
  128. uint32_t Type : 8;
  129. uint32_t Opcode : 16;
  130. } Details;
  131. uint32_t u32Header;
  132. } Header;
  133. uint32_t UID;
  134. uint32_t InstructionOffset;
  135. };
  136. template <typename ReturnType>
  137. struct DebugShaderModifierRecordDXILStep
  138. : public DebugShaderModifierRecordDXILStepBase {
  139. ReturnType ReturnValue;
  140. union {
  141. struct {
  142. uint32_t ValueOrdinalBase : 16;
  143. uint32_t ValueOrdinalIndex : 16;
  144. } Details;
  145. uint32_t u32ValueOrdinal;
  146. } ValueOrdinal;
  147. };
  148. template <>
  149. struct DebugShaderModifierRecordDXILStep<void>
  150. : public DebugShaderModifierRecordDXILStepBase {};
  151. #pragma pack(pop)
  152. uint32_t
  153. DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
  154. return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) /
  155. sizeof(uint32_t));
  156. }
  157. class DxilDebugInstrumentation : public ModulePass {
  158. private:
  159. union ParametersAllTogether {
  160. unsigned Parameters[3];
  161. struct PixelShaderParameters {
  162. unsigned X;
  163. unsigned Y;
  164. } PixelShader;
  165. struct VertexShaderParameters {
  166. unsigned VertexId;
  167. unsigned InstanceId;
  168. } VertexShader;
  169. struct ComputeShaderParameters {
  170. unsigned ThreadIdX;
  171. unsigned ThreadIdY;
  172. unsigned ThreadIdZ;
  173. } ComputeShader;
  174. struct GeometryShaderParameters {
  175. unsigned PrimitiveId;
  176. unsigned InstanceId;
  177. } GeometryShader;
  178. } m_Parameters = {{0, 0, 0}};
  179. union SystemValueIndices {
  180. struct PixelShaderParameters {
  181. unsigned Position;
  182. } PixelShader;
  183. struct VertexShaderParameters {
  184. unsigned VertexId;
  185. unsigned InstanceId;
  186. } VertexShader;
  187. struct GeometryShaderParameters {
  188. unsigned PrimitiveId;
  189. unsigned InstanceId;
  190. } GeometryShader;
  191. };
  192. uint64_t m_UAVSize = 1024 * 1024;
  193. Value *m_SelectionCriterion = nullptr;
  194. CallInst *m_HandleForUAV = nullptr;
  195. Value *m_InvocationId = nullptr;
  196. // Together these two values allow branchless writing to the UAV. An
  197. // invocation of the shader is either of interest or not (e.g. it writes to
  198. // the pixel the user selected for debugging or it doesn't). If not of
  199. // interest, debugging output will still occur, but it will be relegated to
  200. // the very top few bytes of the UAV. Invocations of interest, by contrast,
  201. // will be written to the UAV at sequentially increasing offsets.
  202. // This value will either be one or zero (one if the invocation is of
  203. // interest, zero otherwise)
  204. Value *m_OffsetMultiplicand = nullptr;
  205. // This will either be zero (if the invocation is of interest) or
  206. // (UAVSize)-(SmallValue) if not.
  207. Value *m_OffsetAddend = nullptr;
  208. Constant *m_OffsetMask = nullptr;
  209. Constant *m_CounterOffset = nullptr;
  210. struct BuilderContext {
  211. Module &M;
  212. DxilModule &DM;
  213. LLVMContext &Ctx;
  214. OP *HlslOP;
  215. IRBuilder<> &Builder;
  216. };
  217. uint32_t m_RemainingReservedSpaceInBytes = 0;
  218. Value *m_CurrentIndex = nullptr;
  219. public:
  220. static char ID; // Pass identification, replacement for typeid
  221. explicit DxilDebugInstrumentation() : ModulePass(ID) {}
  222. const char *getPassName() const override {
  223. return "Add PIX debug instrumentation";
  224. }
  225. void applyOptions(PassOptions O) override;
  226. bool runOnModule(Module &M) override;
  227. private:
  228. SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
  229. void addUAV(BuilderContext &BC);
  230. void addInvocationSelectionProlog(BuilderContext &BC,
  231. SystemValueIndices SVIndices);
  232. Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
  233. Value *addGeometryShaderProlog(BuilderContext &BC,
  234. SystemValueIndices SVIndices);
  235. Value *addDispatchedShaderProlog(BuilderContext &BC);
  236. Value *addVertexShaderProlog(BuilderContext &BC,
  237. SystemValueIndices SVIndices);
  238. void addDebugEntryValue(BuilderContext &BC, Value *TheValue);
  239. void addInvocationStartMarker(BuilderContext &BC);
  240. void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
  241. void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst);
  242. void addStepDebugEntry(BuilderContext &BC, Instruction *Inst);
  243. void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum,
  244. Value *V, std::uint32_t ValueOrdinal,
  245. Value *ValueOrdinalIndex);
  246. uint32_t UAVDumpingGroundOffset();
  247. template <typename ReturnType>
  248. void addStepEntryForType(DebugShaderModifierRecordType RecordType,
  249. BuilderContext &BC, std::uint32_t InstNum, Value *V,
  250. std::uint32_t ValueOrdinal,
  251. Value *ValueOrdinalIndex);
  252. };
  253. void DxilDebugInstrumentation::applyOptions(PassOptions O) {
  254. GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
  255. GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
  256. GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
  257. GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
  258. }
  259. uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
  260. return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
  261. }
  262. DxilDebugInstrumentation::SystemValueIndices
  263. DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) {
  264. SystemValueIndices SVIndices{};
  265. hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature();
  266. auto &InputElements = InputSignature.GetElements();
  267. auto ShaderModel = BC.DM.GetShaderModel();
  268. switch (ShaderModel->GetKind()) {
  269. case DXIL::ShaderKind::Amplification:
  270. case DXIL::ShaderKind::Mesh:
  271. case DXIL::ShaderKind::Compute:
  272. // Dispatch* thread Id is not in the input signature
  273. break;
  274. case DXIL::ShaderKind::Vertex: {
  275. {
  276. auto Existing_SV_VertexId = std::find_if(
  277. InputElements.begin(), InputElements.end(),
  278. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  279. return Element->GetSemantic()->GetKind() ==
  280. hlsl::DXIL::SemanticKind::VertexID;
  281. });
  282. if (Existing_SV_VertexId == InputElements.end()) {
  283. auto Added_SV_VertexId =
  284. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  285. Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(),
  286. hlsl::DXIL::InterpolationMode::Undefined,
  287. 1, 1);
  288. Added_SV_VertexId->AppendSemanticIndex(0);
  289. Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  290. Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID);
  291. auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId));
  292. SVIndices.VertexShader.VertexId = InputElements[index]->GetID();
  293. } else {
  294. SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID();
  295. }
  296. }
  297. {
  298. auto Existing_SV_InstanceId = std::find_if(
  299. InputElements.begin(), InputElements.end(),
  300. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  301. return Element->GetSemantic()->GetKind() ==
  302. hlsl::DXIL::SemanticKind::InstanceID;
  303. });
  304. if (Existing_SV_InstanceId == InputElements.end()) {
  305. auto Added_SV_InstanceId =
  306. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
  307. Added_SV_InstanceId->Initialize(
  308. "InstanceId", hlsl::CompType::getF32(),
  309. hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
  310. Added_SV_InstanceId->AppendSemanticIndex(0);
  311. Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);
  312. Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID);
  313. auto index =
  314. InputSignature.AppendElement(std::move(Added_SV_InstanceId));
  315. SVIndices.VertexShader.InstanceId = InputElements[index]->GetID();
  316. } else {
  317. SVIndices.VertexShader.InstanceId =
  318. Existing_SV_InstanceId->get()->GetID();
  319. }
  320. }
  321. } break;
  322. case DXIL::ShaderKind::Geometry:
  323. // GS Instance Id and Primitive Id are not in the input signature
  324. break;
  325. case DXIL::ShaderKind::Pixel: {
  326. auto Existing_SV_Position =
  327. std::find_if(InputElements.begin(), InputElements.end(),
  328. [](const std::unique_ptr<DxilSignatureElement> &Element) {
  329. return Element->GetSemantic()->GetKind() ==
  330. hlsl::DXIL::SemanticKind::Position;
  331. });
  332. // SV_Position, if present, has to have full mask, so we needn't worry
  333. // about the shader having selected components that don't include x or y.
  334. // If not present, we add it.
  335. if (Existing_SV_Position == InputElements.end()) {
  336. auto Added_SV_Position =
  337. llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
  338. Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(),
  339. hlsl::DXIL::InterpolationMode::Linear, 1,
  340. 4);
  341. Added_SV_Position->AppendSemanticIndex(0);
  342. Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
  343. Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
  344. auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
  345. SVIndices.PixelShader.Position = InputElements[index]->GetID();
  346. } else {
  347. SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
  348. }
  349. } break;
  350. default:
  351. assert(false); // guaranteed by runOnModule
  352. }
  353. return SVIndices;
  354. }
  355. Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) {
  356. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  357. Constant *One32Arg = BC.HlslOP->GetU32Const(1);
  358. Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
  359. auto ThreadIdFunc =
  360. BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
  361. Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
  362. auto ThreadIdX =
  363. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX");
  364. auto ThreadIdY =
  365. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY");
  366. auto ThreadIdZ =
  367. BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ");
  368. // Compare to expected thread ID
  369. auto CompareToX = BC.Builder.CreateICmpEQ(
  370. ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX),
  371. "CompareToThreadIdX");
  372. auto CompareToY = BC.Builder.CreateICmpEQ(
  373. ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY),
  374. "CompareToThreadIdY");
  375. auto CompareToZ = BC.Builder.CreateICmpEQ(
  376. ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ),
  377. "CompareToThreadIdZ");
  378. auto CompareXAndY =
  379. BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
  380. auto CompareAll =
  381. BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
  382. return CompareAll;
  383. }
  384. Value *
  385. DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC,
  386. SystemValueIndices SVIndices) {
  387. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  388. Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
  389. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  390. auto LoadInputOpFunc =
  391. BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
  392. Constant *LoadInputOpcode =
  393. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  394. Constant *SV_Vert_ID =
  395. BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
  396. auto VertId =
  397. BC.Builder.CreateCall(LoadInputOpFunc,
  398. {LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/,
  399. Zero8Arg /*column*/, UndefArg},
  400. "VertId");
  401. Constant *SV_Instance_ID =
  402. BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
  403. auto InstanceId =
  404. BC.Builder.CreateCall(LoadInputOpFunc,
  405. {LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/,
  406. Zero8Arg /*column*/, UndefArg},
  407. "InstanceId");
  408. // Compare to expected vertex ID and instance ID
  409. auto CompareToVert = BC.Builder.CreateICmpEQ(
  410. VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId),
  411. "CompareToVertId");
  412. auto CompareToInstance = BC.Builder.CreateICmpEQ(
  413. InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId),
  414. "CompareToInstanceId");
  415. auto CompareBoth =
  416. BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
  417. return CompareBoth;
  418. }
  419. Value *DxilDebugInstrumentation::addGeometryShaderProlog(
  420. BuilderContext &BC, SystemValueIndices SVIndices) {
  421. auto PrimitiveIdOpFunc =
  422. BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx));
  423. Constant *PrimitiveIdOpcode =
  424. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID);
  425. auto PrimId =
  426. BC.Builder.CreateCall(PrimitiveIdOpFunc, {PrimitiveIdOpcode}, "PrimId");
  427. auto CompareToPrim = BC.Builder.CreateICmpEQ(
  428. PrimId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.PrimitiveId),
  429. "CompareToPrimId");
  430. if (BC.DM.GetGSInstanceCount() <= 1) {
  431. return CompareToPrim;
  432. }
  433. auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID,
  434. Type::getInt32Ty(BC.Ctx));
  435. Constant *GSInstanceIdOpcode =
  436. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID);
  437. auto GSInstanceId = BC.Builder.CreateCall(
  438. GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId");
  439. // Compare to expected vertex ID and instance ID
  440. auto CompareToInstance = BC.Builder.CreateICmpEQ(
  441. GSInstanceId,
  442. BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId),
  443. "CompareToInstanceId");
  444. auto CompareBoth =
  445. BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth");
  446. return CompareBoth;
  447. }
  448. Value *
  449. DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
  450. SystemValueIndices SVIndices) {
  451. Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
  452. Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
  453. Constant *One8Arg = BC.HlslOP->GetI8Const(1);
  454. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  455. // Convert SV_POSITION to UINT
  456. Value *XAsInt;
  457. Value *YAsInt;
  458. {
  459. auto LoadInputOpFunc =
  460. BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
  461. Constant *LoadInputOpcode =
  462. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
  463. Constant *SV_Pos_ID =
  464. BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
  465. auto XPos =
  466. BC.Builder.CreateCall(LoadInputOpFunc,
  467. {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
  468. Zero8Arg /*column*/, UndefArg},
  469. "XPos");
  470. auto YPos =
  471. BC.Builder.CreateCall(LoadInputOpFunc,
  472. {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
  473. One8Arg /*column*/, UndefArg},
  474. "YPos");
  475. XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos,
  476. Type::getInt32Ty(BC.Ctx), "XIndex");
  477. YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos,
  478. Type::getInt32Ty(BC.Ctx), "YIndex");
  479. }
  480. // Compare to expected pixel position and primitive ID
  481. auto CompareToX = BC.Builder.CreateICmpEQ(
  482. XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
  483. auto CompareToY = BC.Builder.CreateICmpEQ(
  484. YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
  485. auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
  486. return ComparePos;
  487. }
  488. void DxilDebugInstrumentation::addUAV(BuilderContext &BC) {
  489. // Set up a UAV with structure of a single int
  490. unsigned int UAVResourceHandle =
  491. static_cast<unsigned int>(BC.DM.GetUAVs().size());
  492. SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
  493. llvm::StructType *UAVStructTy =
  494. llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
  495. std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
  496. pUAV->SetGlobalName("PIX_DebugUAVName");
  497. pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
  498. pUAV->SetID(UAVResourceHandle);
  499. pUAV->SetSpaceID(
  500. (unsigned int)-2); // This is the reserved-for-tools register space
  501. pUAV->SetSampleCount(1);
  502. pUAV->SetGloballyCoherent(false);
  503. pUAV->SetHasCounter(false);
  504. pUAV->SetCompType(CompType::getI32());
  505. pUAV->SetLowerBound(0);
  506. pUAV->SetRangeSize(1);
  507. pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
  508. pUAV->SetRW(true);
  509. auto ID = BC.DM.AddUAV(std::move(pUAV));
  510. assert(ID == UAVResourceHandle);
  511. BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
  512. // Create handle for the newly-added UAV
  513. Function *CreateHandleOpFunc =
  514. BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
  515. Constant *CreateHandleOpcodeArg =
  516. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
  517. Constant *UAVVArg = BC.HlslOP->GetI8Const(
  518. static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
  519. DXIL::ResourceClass::UAV));
  520. Constant *MetaDataArg = BC.HlslOP->GetU32Const(
  521. ID); // position of the metadata record in the corresponding metadata list
  522. Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
  523. Constant *FalseArg =
  524. BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
  525. m_HandleForUAV = BC.Builder.CreateCall(
  526. CreateHandleOpFunc,
  527. {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
  528. "PIX_DebugUAV_Handle");
  529. }
  530. void DxilDebugInstrumentation::addInvocationSelectionProlog(
  531. BuilderContext &BC, SystemValueIndices SVIndices) {
  532. auto ShaderModel = BC.DM.GetShaderModel();
  533. Value *ParameterTestResult = nullptr;
  534. switch (ShaderModel->GetKind()) {
  535. case DXIL::ShaderKind::Compute:
  536. case DXIL::ShaderKind::Amplification:
  537. case DXIL::ShaderKind::Mesh:
  538. ParameterTestResult = addDispatchedShaderProlog(BC);
  539. break;
  540. case DXIL::ShaderKind::Geometry:
  541. ParameterTestResult = addGeometryShaderProlog(BC, SVIndices);
  542. break;
  543. case DXIL::ShaderKind::Vertex:
  544. ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
  545. break;
  546. case DXIL::ShaderKind::Pixel:
  547. ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
  548. break;
  549. default:
  550. assert(false); // guaranteed by runOnModule
  551. }
  552. // This is a convenient place to calculate the values that modify the UAV
  553. // offset for invocations of interest and for UAV size.
  554. m_OffsetMultiplicand =
  555. BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult,
  556. Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand");
  557. auto InverseOffsetMultiplicand =
  558. BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand,
  559. "ComplementOfMultiplicand");
  560. m_OffsetAddend =
  561. BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()),
  562. InverseOffsetMultiplicand, "OffsetAddend");
  563. m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
  564. m_CounterOffset = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + CounterOffsetBeyondUsefulData);
  565. m_SelectionCriterion = ParameterTestResult;
  566. }
  567. void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC,
  568. uint32_t SpaceInBytes) {
  569. assert(m_CurrentIndex == nullptr);
  570. assert(m_RemainingReservedSpaceInBytes == 0);
  571. m_RemainingReservedSpaceInBytes = SpaceInBytes;
  572. // Insert the UAV increment instruction:
  573. Function *AtomicOpFunc =
  574. BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
  575. Constant *AtomicBinOpcode =
  576. BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
  577. Constant *AtomicAdd =
  578. BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
  579. UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  580. // so inc will be zero for uninteresting invocations:
  581. Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
  582. Value *IncrementForThisInvocation = BC.Builder.CreateMul(
  583. Increment, m_OffsetMultiplicand, "IncrementForThisInvocation");
  584. auto PreviousValue = BC.Builder.CreateCall(
  585. AtomicOpFunc,
  586. {
  587. AtomicBinOpcode, // i32, ; opcode
  588. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  589. AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
  590. // XOR, IMIN, IMAX, UMIN, UMAX
  591. m_CounterOffset, // i32, ; coordinate c0: index in bytes
  592. UndefArg, // i32, ; coordinate c1 (unused)
  593. UndefArg, // i32, ; coordinate c2 (unused)
  594. IncrementForThisInvocation, // i32); increment value
  595. },
  596. "UAVIncResult");
  597. if (m_InvocationId == nullptr) {
  598. m_InvocationId = PreviousValue;
  599. }
  600. auto MaskedForLimit =
  601. BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
  602. // The return value will either end up being itself (multiplied by one and
  603. // added with zero) or the "dump uninteresting things here" value of (UAVSize
  604. // - a bit).
  605. auto MultipliedForInterest = BC.Builder.CreateMul(
  606. MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest");
  607. auto AddedForInterest = BC.Builder.CreateAdd(
  608. MultipliedForInterest, m_OffsetAddend, "AddedForInterest");
  609. m_CurrentIndex = AddedForInterest;
  610. }
  611. void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC,
  612. Value *TheValue) {
  613. assert(m_RemainingReservedSpaceInBytes > 0);
  614. auto TheValueTypeID = TheValue->getType()->getTypeID();
  615. if (TheValueTypeID == Type::TypeID::DoubleTyID) {
  616. Function *SplitDouble =
  617. BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
  618. Constant *SplitDoubleOpcode =
  619. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
  620. auto SplitDoubleIntruction = BC.Builder.CreateCall(
  621. SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble");
  622. auto LowBits =
  623. BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
  624. auto HighBits =
  625. BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
  626. // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  627. addDebugEntryValue(BC, LowBits);
  628. addDebugEntryValue(BC, HighBits);
  629. } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  630. TheValue->getType()->getIntegerBitWidth() == 64) {
  631. auto LowBits =
  632. BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
  633. auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
  634. auto HighBits = BC.Builder.CreateTrunc(
  635. ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
  636. // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
  637. addDebugEntryValue(BC, LowBits);
  638. addDebugEntryValue(BC, HighBits);
  639. } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
  640. (TheValue->getType()->getIntegerBitWidth() == 16 ||
  641. TheValue->getType()->getIntegerBitWidth() == 1)) {
  642. auto As32 =
  643. BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
  644. addDebugEntryValue(BC, As32);
  645. } else if (TheValueTypeID == Type::TypeID::HalfTyID) {
  646. auto AsFloat =
  647. BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
  648. addDebugEntryValue(BC, AsFloat);
  649. } else {
  650. Function *StoreValue =
  651. BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore,
  652. TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
  653. Constant *StoreValueOpcode =
  654. BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
  655. UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  656. UndefValue *UndefArg = nullptr;
  657. if (TheValueTypeID == Type::TypeID::IntegerTyID) {
  658. UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
  659. } else if (TheValueTypeID == Type::TypeID::FloatTyID) {
  660. UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx));
  661. } else {
  662. // The above are the only two valid types for a UAV store
  663. assert(false);
  664. }
  665. Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
  666. (void)BC.Builder.CreateCall(
  667. StoreValue, {StoreValueOpcode, // i32 opcode
  668. m_HandleForUAV, // %dx.types.Handle, ; resource handle
  669. m_CurrentIndex, // i32 c0: index in bytes into UAV
  670. Undef32Arg, // i32 c1: unused
  671. TheValue,
  672. UndefArg, // unused values
  673. UndefArg, // unused values
  674. UndefArg, // unused values
  675. WriteMask_X});
  676. m_RemainingReservedSpaceInBytes -= 4;
  677. assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow
  678. if (m_RemainingReservedSpaceInBytes != 0) {
  679. m_CurrentIndex =
  680. BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4));
  681. } else {
  682. m_CurrentIndex = nullptr;
  683. }
  684. }
  685. }
  686. void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
  687. DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0};
  688. reserveDebugEntrySpace(BC, sizeof(marker));
  689. marker.Header.Details.SizeDwords =
  690. DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));
  691. ;
  692. marker.Header.Details.Flags = 0;
  693. marker.Header.Details.Type =
  694. DebugShaderModifierRecordTypeInvocationStartMarker;
  695. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
  696. addDebugEntryValue(BC, m_InvocationId);
  697. }
  698. template <typename ReturnType>
  699. void DxilDebugInstrumentation::addStepEntryForType(
  700. DebugShaderModifierRecordType RecordType, BuilderContext &BC,
  701. std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal,
  702. Value *ValueOrdinalIndex) {
  703. DebugShaderModifierRecordDXILStep<ReturnType> step = {};
  704. reserveDebugEntrySpace(BC, sizeof(step));
  705. step.Header.Details.SizeDwords =
  706. DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
  707. step.Header.Details.Type = static_cast<uint8_t>(RecordType);
  708. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
  709. addDebugEntryValue(BC, m_InvocationId);
  710. addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum));
  711. if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) {
  712. addDebugEntryValue(BC, V);
  713. IRBuilder<> &B = BC.Builder;
  714. Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16);
  715. Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF),
  716. "ValueOrdinalIndex");
  717. Value *EncodedValueOrdinalAndIndex =
  718. BC.Builder.CreateOr(VO, VOI, "ValueOrdinal");
  719. addDebugEntryValue(BC, EncodedValueOrdinalAndIndex);
  720. }
  721. }
  722. void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext &BC,
  723. StoreInst *Inst) {
  724. std::uint32_t ValueOrdinalBase;
  725. std::uint32_t UnusedValueOrdinalSize;
  726. llvm::Value *ValueOrdinalIndex;
  727. if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
  728. &UnusedValueOrdinalSize,
  729. &ValueOrdinalIndex)) {
  730. return;
  731. }
  732. std::uint32_t InstNum;
  733. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  734. return;
  735. }
  736. addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase,
  737. ValueOrdinalIndex);
  738. }
  739. void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC,
  740. Instruction *Inst) {
  741. if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
  742. return;
  743. }
  744. if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
  745. addStoreStepDebugEntry(BC, St);
  746. return;
  747. }
  748. std::uint32_t RegNum;
  749. if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) {
  750. return;
  751. }
  752. std::uint32_t InstNum;
  753. if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
  754. return;
  755. }
  756. addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0));
  757. }
  758. void DxilDebugInstrumentation::addStepDebugEntryValue(
  759. BuilderContext &BC, std::uint32_t InstNum, Value *V,
  760. std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) {
  761. const Type::TypeID ID = V->getType()->getTypeID();
  762. switch (ID) {
  763. case Type::TypeID::StructTyID:
  764. case Type::TypeID::VoidTyID:
  765. addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC,
  766. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  767. break;
  768. case Type::TypeID::FloatTyID:
  769. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
  770. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  771. break;
  772. case Type::TypeID::IntegerTyID:
  773. if (V->getType()->getIntegerBitWidth() == 64) {
  774. addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64,
  775. BC, InstNum, V, ValueOrdinal,
  776. ValueOrdinalIndex);
  777. } else {
  778. addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32,
  779. BC, InstNum, V, ValueOrdinal,
  780. ValueOrdinalIndex);
  781. }
  782. break;
  783. case Type::TypeID::DoubleTyID:
  784. addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC,
  785. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  786. break;
  787. case Type::TypeID::HalfTyID:
  788. addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
  789. InstNum, V, ValueOrdinal, ValueOrdinalIndex);
  790. break;
  791. case Type::TypeID::PointerTyID:
  792. // Skip pointer calculation instructions. They aren't particularly
  793. // meaningful to the user (being a mere implementation detail for lookup
  794. // tables, etc.), and their type is problematic from a UI point of view. The
  795. // subsequent instructions that dereference the pointer will be properly
  796. // instrumented and show the (meaningful) retrieved value.
  797. break;
  798. case Type::TypeID::FP128TyID:
  799. case Type::TypeID::LabelTyID:
  800. case Type::TypeID::MetadataTyID:
  801. case Type::TypeID::FunctionTyID:
  802. case Type::TypeID::ArrayTyID:
  803. case Type::TypeID::VectorTyID:
  804. case Type::TypeID::X86_FP80TyID:
  805. case Type::TypeID::X86_MMXTyID:
  806. case Type::TypeID::PPC_FP128TyID:
  807. assert(false);
  808. }
  809. }
  810. bool DxilDebugInstrumentation::runOnModule(Module &M) {
  811. DxilModule &DM = M.GetOrCreateDxilModule();
  812. LLVMContext &Ctx = M.getContext();
  813. OP *HlslOP = DM.GetOP();
  814. auto ShaderModel = DM.GetShaderModel();
  815. switch (ShaderModel->GetKind()) {
  816. case DXIL::ShaderKind::Amplification:
  817. case DXIL::ShaderKind::Mesh:
  818. case DXIL::ShaderKind::Vertex:
  819. case DXIL::ShaderKind::Geometry:
  820. case DXIL::ShaderKind::Pixel:
  821. case DXIL::ShaderKind::Compute:
  822. break;
  823. default:
  824. return false;
  825. }
  826. // First record pointers to all instructions in the function:
  827. std::vector<Instruction *> AllInstructions;
  828. for (inst_iterator I = inst_begin(DM.GetEntryFunction()),
  829. E = inst_end(DM.GetEntryFunction());
  830. I != E; ++I) {
  831. AllInstructions.push_back(&*I);
  832. }
  833. // Branchless instrumentation requires taking care of a few things:
  834. // -Each invocation of the shader will be either of interest or not of
  835. // interest
  836. // -If of interest, the offset into the output UAV will be as expected
  837. // -If not, the offset is forced to (UAVsize) - (Small Amount), and that
  838. // output is ignored by the CPU-side code.
  839. // -The invocation of interest may overflow the UAV. This is handled by taking
  840. // the modulus of the
  841. // output index. Overflow is then detected on the CPU side by checking for
  842. // the presence of a canary value at (UAVSize) - (Small Amount) * 2 (which is
  843. // actually a conservative definition of overflow).
  844. //
  845. Instruction *firstInsertionPt =
  846. dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
  847. IRBuilder<> Builder(firstInsertionPt);
  848. BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
  849. addUAV(BC);
  850. auto SystemValues = addRequiredSystemValues(BC);
  851. addInvocationSelectionProlog(BC, SystemValues);
  852. addInvocationStartMarker(BC);
  853. // Explicitly name new blocks in order to provide stable names for testing purposes
  854. int NewBlockCounter = 0;
  855. auto Fn = DM.GetEntryFunction();
  856. auto &Blocks = Fn->getBasicBlockList();
  857. for (auto &CurrentBlock : Blocks) {
  858. struct ValueAndPhi {
  859. Value *Val;
  860. PHINode *Phi;
  861. unsigned Index;
  862. };
  863. std::map<BasicBlock *, std::vector<ValueAndPhi>> InsertableEdges;
  864. auto &Is = CurrentBlock.getInstList();
  865. for (auto &Inst : Is) {
  866. if (Inst.getOpcode() != Instruction::OtherOps::PHI) {
  867. break;
  868. }
  869. PHINode &PN = llvm::cast<PHINode>(Inst);
  870. for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
  871. BasicBlock *PhiBB = PN.getIncomingBlock(i);
  872. Value *PhiVal = PN.getIncomingValue(i);
  873. InsertableEdges[PhiBB].push_back({PhiVal, &PN, i});
  874. }
  875. }
  876. for (auto &InsertableEdge : InsertableEdges) {
  877. auto *NewBlock = BasicBlock::Create(Ctx, "PIXDebug" + std::to_string(NewBlockCounter++),
  878. InsertableEdge.first->getParent());
  879. IRBuilder<> Builder(NewBlock);
  880. auto *PreviousBlock = InsertableEdge.first;
  881. // Modify all successor operands of the terminator in the previous block
  882. // that match the current block to point to the new block:
  883. TerminatorInst *terminator = PreviousBlock->getTerminator();
  884. unsigned NumSuccessors = terminator->getNumSuccessors();
  885. for (unsigned SuccessorIndex = 0; SuccessorIndex < NumSuccessors;
  886. ++SuccessorIndex) {
  887. auto *CurrentSuccessor = terminator->getSuccessor(SuccessorIndex);
  888. if (CurrentSuccessor == &CurrentBlock) {
  889. terminator->setSuccessor(SuccessorIndex, NewBlock);
  890. }
  891. }
  892. // Modify the Phis and add debug instrumentation
  893. for (auto &ValueNPhi : InsertableEdge.second) {
  894. // Modify the phi to refer to the new block:
  895. ValueNPhi.Phi->setIncomingBlock(ValueNPhi.Index, NewBlock);
  896. // Add instrumentation to the new block
  897. std::uint32_t RegNum;
  898. if (!pix_dxil::PixDxilReg::FromInst(ValueNPhi.Phi, &RegNum)) {
  899. continue;
  900. }
  901. std::uint32_t InstNum;
  902. if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) {
  903. continue;
  904. }
  905. BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
  906. addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum,
  907. BC.Builder.getInt32(0));
  908. }
  909. // Add a branch to the new block to point to the current block
  910. Builder.CreateBr(&CurrentBlock);
  911. }
  912. }
  913. // Instrument original instructions:
  914. for (auto &Inst : AllInstructions) {
  915. // Instrumentation goes after the instruction if it is not a terminator.
  916. // Otherwise, Instrumentation goes prior to the instruction.
  917. if (!Inst->isTerminator()) {
  918. IRBuilder<> Builder(Inst->getNextNode());
  919. BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
  920. addStepDebugEntry(BC2, Inst);
  921. } else {
  922. // Insert before this instruction
  923. IRBuilder<> Builder(Inst);
  924. BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
  925. addStepDebugEntry(BC2, Inst);
  926. }
  927. }
  928. DM.ReEmitDxilResources();
  929. return true;
  930. }
  931. char DxilDebugInstrumentation::ID = 0;
  932. ModulePass *llvm::createDxilDebugInstrumentationPass() {
  933. return new DxilDebugInstrumentation();
  934. }
  935. INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation",
  936. "HLSL DXIL debug instrumentation for PIX", false, false)