123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020 |
- ///////////////////////////////////////////////////////////////////////////////
- // //
- // DxilDebugInstrumentation.cpp //
- // Copyright (C) Microsoft Corporation. All rights reserved. //
- // This file is distributed under the University of Illinois Open Source //
- // License. See LICENSE.TXT for details. //
- // //
- // Adds instrumentation that enables shader debugging in PIX //
- // //
- ///////////////////////////////////////////////////////////////////////////////
- #include "dxc/DXIL/DxilModule.h"
- #include "dxc/DXIL/DxilOperations.h"
- #include "dxc/DXIL/DxilUtil.h"
- #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
- #include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
- #include "dxc/HLSL/DxilGenerationPass.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/IRBuilder.h"
- #include "llvm/IR/InstIterator.h"
- #include "llvm/IR/Module.h"
- #include "PixPassHelpers.h"
- using namespace llvm;
- using namespace hlsl;
- // Overview of instrumentation:
- //
- // In summary, instructions are added that cause a "trace" of the execution of
- // the shader to be written out to a UAV. This trace is then used by a debugger
- // application to provide a post-mortem debugging experience that reconstructs
- // the execution history of the shader.
- //
- // The trace is only required for a particular shader instance of interest, and
- // a branchless mechanism is used to write the trace either to an incrementing
- // location within the UAV, or to a "dumping ground" area at the top of the UAV
- // if the instance is not of interest.
- //
- // The following modifications are made:
- //
- // First, instructions are added to the top of the entry point function that
- // implement the following:
- // - Examine the input variables that define the instance of the shader that is
- // running. This will
- // be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex
- // shaders, thread id for compute shaders etc. If these system values need to
- // be added to the shader, then they are also added to the input signature,
- // if appropriate.
- // - Compare the above variables with the instance of interest defined by the
- // invoker of this pass.
- // Deduce two values: a multiplicand and an addend that together allow a
- // branchless calculation of the offset into the UAV at which to write via
- // "offset = offset * multiplicand + addend." If the instance is NOT of
- // interest, the multiplicand is zero and the addend is sizeof(UAV)-(a little
- // bit), causing writes for uninteresting invocations to end up at the top of
- // the UAV. Otherwise the multiplicand is 1 and the addend is 0.
- // - Calculate an "instance identifier". Even with the above instance
- // identification, several invocations may
- // end up matching the selection criteria. Specifically, this happens during
- // a draw call in which many triangles overlap the pixel of interest. More on
- // this below.
- //
- // During execution, the instrumentation for most instructions cause data to be
- // emitted to the UAV. The index at which data is written is identified by
- // treating the first uint32 of the UAV as an index which is atomically
- // incremented by the instrumentation. The very first value of this counter that
- // is encountered by each invocation is used as the "instance identifier"
- // mentioned above. That instance identifier is written out with each packet,
- // since many pixel shaders executing in parallel will emit interleaved packets,
- // and the debugger application uses the identifiers to group packets from each
- // separate invocation together.
- //
- // If an instruction has a non-void and primitive return type, i.e. isn't a
- // struct, then the instrumentation will write that value out to the UAV as well
- // as part of the "step" data packet.
- //
- // The limiting size of the UAV is enforced in a branchless way by ANDing the
- // offset with a precomputed value that is sizeof(UAV)-64. The actual size of
- // the UAV allocated by the caller is required to be a power of two plus 64 for
- // this reason. The caller detects UAV overrun by examining a canary value close
- // to the end of the power-of-two size of the UAV. If this value has been
- // overwritten, the debug session is deemed to have overflowed the UAV. The
- // caller will than allocate a UAV that is twice the size and try again, up to a
- // predefined maximum.
- // Keep these in sync with the same-named value in the debugger application's
- // WinPixShaderUtils.h
- constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
- // The actual max size per record is much smaller than this, but it never
- // hurts to be generous.
- constexpr size_t CounterOffsetBeyondUsefulData = DebugBufferDumpingGroundSize / 2;
- // These definitions echo those in the debugger application's
- // debugshaderrecord.h file
- enum DebugShaderModifierRecordType {
- DebugShaderModifierRecordTypeInvocationStartMarker,
- DebugShaderModifierRecordTypeStep,
- DebugShaderModifierRecordTypeEvent,
- DebugShaderModifierRecordTypeInputRegister,
- DebugShaderModifierRecordTypeReadRegister,
- DebugShaderModifierRecordTypeWrittenRegister,
- DebugShaderModifierRecordTypeRegisterRelativeIndex0,
- DebugShaderModifierRecordTypeRegisterRelativeIndex1,
- DebugShaderModifierRecordTypeRegisterRelativeIndex2,
- DebugShaderModifierRecordTypeDXILStepVoid = 251,
- DebugShaderModifierRecordTypeDXILStepFloat = 252,
- DebugShaderModifierRecordTypeDXILStepUint32 = 253,
- DebugShaderModifierRecordTypeDXILStepUint64 = 254,
- DebugShaderModifierRecordTypeDXILStepDouble = 255,
- };
- // These structs echo those in the debugger application's debugshaderrecord.h
- // file, but are recapitulated here because the originals use unnamed unions
- // which are disallowed by DXCompiler's build.
- //
- #pragma pack(push, 4)
- struct DebugShaderModifierRecordHeader {
- union {
- struct {
- uint32_t SizeDwords : 4;
- uint32_t Flags : 4;
- uint32_t Type : 8;
- uint32_t HeaderPayload : 16;
- } Details;
- uint32_t u32Header;
- } Header;
- uint32_t UID;
- };
- struct DebugShaderModifierRecordDXILStepBase {
- union {
- struct {
- uint32_t SizeDwords : 4;
- uint32_t Flags : 4;
- uint32_t Type : 8;
- uint32_t Opcode : 16;
- } Details;
- uint32_t u32Header;
- } Header;
- uint32_t UID;
- uint32_t InstructionOffset;
- };
- template <typename ReturnType>
- struct DebugShaderModifierRecordDXILStep
- : public DebugShaderModifierRecordDXILStepBase {
- ReturnType ReturnValue;
- union {
- struct {
- uint32_t ValueOrdinalBase : 16;
- uint32_t ValueOrdinalIndex : 16;
- } Details;
- uint32_t u32ValueOrdinal;
- } ValueOrdinal;
- };
- template <>
- struct DebugShaderModifierRecordDXILStep<void>
- : public DebugShaderModifierRecordDXILStepBase {};
- #pragma pack(pop)
- uint32_t
- DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
- return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) /
- sizeof(uint32_t));
- }
- class DxilDebugInstrumentation : public ModulePass {
- private:
- union ParametersAllTogether {
- unsigned Parameters[3];
- struct PixelShaderParameters {
- unsigned X;
- unsigned Y;
- } PixelShader;
- struct VertexShaderParameters {
- unsigned VertexId;
- unsigned InstanceId;
- } VertexShader;
- struct ComputeShaderParameters {
- unsigned ThreadIdX;
- unsigned ThreadIdY;
- unsigned ThreadIdZ;
- } ComputeShader;
- struct GeometryShaderParameters {
- unsigned PrimitiveId;
- unsigned InstanceId;
- } GeometryShader;
- } m_Parameters = {{0, 0, 0}};
- union SystemValueIndices {
- struct PixelShaderParameters {
- unsigned Position;
- } PixelShader;
- struct VertexShaderParameters {
- unsigned VertexId;
- unsigned InstanceId;
- } VertexShader;
- struct GeometryShaderParameters {
- unsigned PrimitiveId;
- unsigned InstanceId;
- } GeometryShader;
- };
- uint64_t m_UAVSize = 1024 * 1024;
- Value *m_SelectionCriterion = nullptr;
- CallInst *m_HandleForUAV = nullptr;
- Value *m_InvocationId = nullptr;
- // Together these two values allow branchless writing to the UAV. An
- // invocation of the shader is either of interest or not (e.g. it writes to
- // the pixel the user selected for debugging or it doesn't). If not of
- // interest, debugging output will still occur, but it will be relegated to
- // the very top few bytes of the UAV. Invocations of interest, by contrast,
- // will be written to the UAV at sequentially increasing offsets.
- // This value will either be one or zero (one if the invocation is of
- // interest, zero otherwise)
- Value *m_OffsetMultiplicand = nullptr;
- // This will either be zero (if the invocation is of interest) or
- // (UAVSize)-(SmallValue) if not.
- Value *m_OffsetAddend = nullptr;
- Constant *m_OffsetMask = nullptr;
- Constant *m_CounterOffset = nullptr;
- struct BuilderContext {
- Module &M;
- DxilModule &DM;
- LLVMContext &Ctx;
- OP *HlslOP;
- IRBuilder<> &Builder;
- };
- uint32_t m_RemainingReservedSpaceInBytes = 0;
- Value *m_CurrentIndex = nullptr;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit DxilDebugInstrumentation() : ModulePass(ID) {}
- const char *getPassName() const override {
- return "Add PIX debug instrumentation";
- }
- void applyOptions(PassOptions O) override;
- bool runOnModule(Module &M) override;
- private:
- SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
- void addInvocationSelectionProlog(BuilderContext &BC,
- SystemValueIndices SVIndices);
- Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
- Value *addGeometryShaderProlog(BuilderContext &BC,
- SystemValueIndices SVIndices);
- Value *addDispatchedShaderProlog(BuilderContext &BC);
- Value *addVertexShaderProlog(BuilderContext &BC,
- SystemValueIndices SVIndices);
- void addDebugEntryValue(BuilderContext &BC, Value *TheValue);
- void addInvocationStartMarker(BuilderContext &BC);
- void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
- void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst);
- void addStepDebugEntry(BuilderContext& BC, Instruction* Inst);
- void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum,
- Value *V, std::uint32_t ValueOrdinal,
- Value *ValueOrdinalIndex);
- uint32_t UAVDumpingGroundOffset();
- template <typename ReturnType>
- void addStepEntryForType(DebugShaderModifierRecordType RecordType,
- BuilderContext &BC, std::uint32_t InstNum, Value *V,
- std::uint32_t ValueOrdinal,
- Value *ValueOrdinalIndex);
- };
- void DxilDebugInstrumentation::applyOptions(PassOptions O) {
- GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
- GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
- GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
- GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
- }
- uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
- return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
- }
- DxilDebugInstrumentation::SystemValueIndices
- DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) {
- SystemValueIndices SVIndices{};
- hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature();
- auto &InputElements = InputSignature.GetElements();
- auto ShaderModel = BC.DM.GetShaderModel();
- switch (ShaderModel->GetKind()) {
- case DXIL::ShaderKind::Amplification:
- case DXIL::ShaderKind::Mesh:
- case DXIL::ShaderKind::Compute:
- // Dispatch* thread Id is not in the input signature
- break;
- case DXIL::ShaderKind::Vertex: {
- {
- auto Existing_SV_VertexId = std::find_if(
- InputElements.begin(), InputElements.end(),
- [](const std::unique_ptr<DxilSignatureElement> &Element) {
- return Element->GetSemantic()->GetKind() ==
- hlsl::DXIL::SemanticKind::VertexID;
- });
- if (Existing_SV_VertexId == InputElements.end()) {
- auto Added_SV_VertexId =
- llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
- Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(),
- hlsl::DXIL::InterpolationMode::Undefined,
- 1, 1);
- Added_SV_VertexId->AppendSemanticIndex(0);
- Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
- Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID);
- auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId));
- SVIndices.VertexShader.VertexId = InputElements[index]->GetID();
- } else {
- SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID();
- }
- }
- {
- auto Existing_SV_InstanceId = std::find_if(
- InputElements.begin(), InputElements.end(),
- [](const std::unique_ptr<DxilSignatureElement> &Element) {
- return Element->GetSemantic()->GetKind() ==
- hlsl::DXIL::SemanticKind::InstanceID;
- });
- if (Existing_SV_InstanceId == InputElements.end()) {
- auto Added_SV_InstanceId =
- llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
- Added_SV_InstanceId->Initialize(
- "InstanceId", hlsl::CompType::getF32(),
- hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
- Added_SV_InstanceId->AppendSemanticIndex(0);
- Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);
- Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID);
- auto index =
- InputSignature.AppendElement(std::move(Added_SV_InstanceId));
- SVIndices.VertexShader.InstanceId = InputElements[index]->GetID();
- } else {
- SVIndices.VertexShader.InstanceId =
- Existing_SV_InstanceId->get()->GetID();
- }
- }
- } break;
- case DXIL::ShaderKind::Geometry:
- // GS Instance Id and Primitive Id are not in the input signature
- break;
- case DXIL::ShaderKind::Pixel: {
- auto Existing_SV_Position =
- std::find_if(InputElements.begin(), InputElements.end(),
- [](const std::unique_ptr<DxilSignatureElement> &Element) {
- return Element->GetSemantic()->GetKind() ==
- hlsl::DXIL::SemanticKind::Position;
- });
- // SV_Position, if present, has to have full mask, so we needn't worry
- // about the shader having selected components that don't include x or y.
- // If not present, we add it.
- if (Existing_SV_Position == InputElements.end()) {
- auto Added_SV_Position =
- llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
- Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(),
- hlsl::DXIL::InterpolationMode::Linear, 1,
- 4);
- Added_SV_Position->AppendSemanticIndex(0);
- Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
- Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
- auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
- SVIndices.PixelShader.Position = InputElements[index]->GetID();
- } else {
- SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
- }
- } break;
- default:
- assert(false); // guaranteed by runOnModule
- }
- return SVIndices;
- }
- Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) {
- Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
- Constant *One32Arg = BC.HlslOP->GetU32Const(1);
- Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
- auto ThreadIdFunc =
- BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
- Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
- auto ThreadIdX =
- BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX");
- auto ThreadIdY =
- BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY");
- auto ThreadIdZ =
- BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ");
- // Compare to expected thread ID
- auto CompareToX = BC.Builder.CreateICmpEQ(
- ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX),
- "CompareToThreadIdX");
- auto CompareToY = BC.Builder.CreateICmpEQ(
- ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY),
- "CompareToThreadIdY");
- auto CompareToZ = BC.Builder.CreateICmpEQ(
- ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ),
- "CompareToThreadIdZ");
- auto CompareXAndY =
- BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
- auto CompareAll =
- BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
- return CompareAll;
- }
- Value *
- DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC,
- SystemValueIndices SVIndices) {
- Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
- Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
- UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
- auto LoadInputOpFunc =
- BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
- Constant *LoadInputOpcode =
- BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
- Constant *SV_Vert_ID =
- BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
- auto VertId =
- BC.Builder.CreateCall(LoadInputOpFunc,
- {LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/,
- Zero8Arg /*column*/, UndefArg},
- "VertId");
- Constant *SV_Instance_ID =
- BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
- auto InstanceId =
- BC.Builder.CreateCall(LoadInputOpFunc,
- {LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/,
- Zero8Arg /*column*/, UndefArg},
- "InstanceId");
- // Compare to expected vertex ID and instance ID
- auto CompareToVert = BC.Builder.CreateICmpEQ(
- VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId),
- "CompareToVertId");
- auto CompareToInstance = BC.Builder.CreateICmpEQ(
- InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId),
- "CompareToInstanceId");
- auto CompareBoth =
- BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
- return CompareBoth;
- }
- Value *DxilDebugInstrumentation::addGeometryShaderProlog(
- BuilderContext &BC, SystemValueIndices SVIndices) {
- auto PrimitiveIdOpFunc =
- BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx));
- Constant *PrimitiveIdOpcode =
- BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID);
- auto PrimId =
- BC.Builder.CreateCall(PrimitiveIdOpFunc, {PrimitiveIdOpcode}, "PrimId");
- auto CompareToPrim = BC.Builder.CreateICmpEQ(
- PrimId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.PrimitiveId),
- "CompareToPrimId");
- if (BC.DM.GetGSInstanceCount() <= 1) {
- return CompareToPrim;
- }
- auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID,
- Type::getInt32Ty(BC.Ctx));
- Constant *GSInstanceIdOpcode =
- BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID);
- auto GSInstanceId = BC.Builder.CreateCall(
- GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId");
- // Compare to expected vertex ID and instance ID
- auto CompareToInstance = BC.Builder.CreateICmpEQ(
- GSInstanceId,
- BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId),
- "CompareToInstanceId");
- auto CompareBoth =
- BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth");
- return CompareBoth;
- }
- Value *
- DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
- SystemValueIndices SVIndices) {
- Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
- Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
- Constant *One8Arg = BC.HlslOP->GetI8Const(1);
- UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
- // Convert SV_POSITION to UINT
- Value *XAsInt;
- Value *YAsInt;
- {
- auto LoadInputOpFunc =
- BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
- Constant *LoadInputOpcode =
- BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
- Constant *SV_Pos_ID =
- BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
- auto XPos =
- BC.Builder.CreateCall(LoadInputOpFunc,
- {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
- Zero8Arg /*column*/, UndefArg},
- "XPos");
- auto YPos =
- BC.Builder.CreateCall(LoadInputOpFunc,
- {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
- One8Arg /*column*/, UndefArg},
- "YPos");
- XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos,
- Type::getInt32Ty(BC.Ctx), "XIndex");
- YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos,
- Type::getInt32Ty(BC.Ctx), "YIndex");
- }
- // Compare to expected pixel position and primitive ID
- auto CompareToX = BC.Builder.CreateICmpEQ(
- XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
- auto CompareToY = BC.Builder.CreateICmpEQ(
- YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
- auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
- return ComparePos;
- }
- void DxilDebugInstrumentation::addInvocationSelectionProlog(
- BuilderContext &BC, SystemValueIndices SVIndices) {
- auto ShaderModel = BC.DM.GetShaderModel();
- Value *ParameterTestResult = nullptr;
- switch (ShaderModel->GetKind()) {
- case DXIL::ShaderKind::Compute:
- case DXIL::ShaderKind::Amplification:
- case DXIL::ShaderKind::Mesh:
- ParameterTestResult = addDispatchedShaderProlog(BC);
- break;
- case DXIL::ShaderKind::Geometry:
- ParameterTestResult = addGeometryShaderProlog(BC, SVIndices);
- break;
- case DXIL::ShaderKind::Vertex:
- ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
- break;
- case DXIL::ShaderKind::Pixel:
- ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
- break;
- default:
- assert(false); // guaranteed by runOnModule
- }
- // This is a convenient place to calculate the values that modify the UAV
- // offset for invocations of interest and for UAV size.
- m_OffsetMultiplicand =
- BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult,
- Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand");
- auto InverseOffsetMultiplicand =
- BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand,
- "ComplementOfMultiplicand");
- m_OffsetAddend =
- BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()),
- InverseOffsetMultiplicand, "OffsetAddend");
- m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
- m_CounterOffset = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + CounterOffsetBeyondUsefulData);
- m_SelectionCriterion = ParameterTestResult;
- }
- void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC,
- uint32_t SpaceInBytes) {
- assert(m_CurrentIndex == nullptr);
- assert(m_RemainingReservedSpaceInBytes == 0);
- m_RemainingReservedSpaceInBytes = SpaceInBytes;
- // Insert the UAV increment instruction:
- Function *AtomicOpFunc =
- BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
- Constant *AtomicBinOpcode =
- BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
- Constant *AtomicAdd =
- BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
- UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
- // so inc will be zero for uninteresting invocations:
- Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
- Value *IncrementForThisInvocation = BC.Builder.CreateMul(
- Increment, m_OffsetMultiplicand, "IncrementForThisInvocation");
- auto PreviousValue = BC.Builder.CreateCall(
- AtomicOpFunc,
- {
- AtomicBinOpcode, // i32, ; opcode
- m_HandleForUAV, // %dx.types.Handle, ; resource handle
- AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
- // XOR, IMIN, IMAX, UMIN, UMAX
- m_CounterOffset, // i32, ; coordinate c0: index in bytes
- UndefArg, // i32, ; coordinate c1 (unused)
- UndefArg, // i32, ; coordinate c2 (unused)
- IncrementForThisInvocation, // i32); increment value
- },
- "UAVIncResult");
- if (m_InvocationId == nullptr) {
- m_InvocationId = PreviousValue;
- }
- auto MaskedForLimit =
- BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
- // The return value will either end up being itself (multiplied by one and
- // added with zero) or the "dump uninteresting things here" value of (UAVSize
- // - a bit).
- auto MultipliedForInterest = BC.Builder.CreateMul(
- MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest");
- auto AddedForInterest = BC.Builder.CreateAdd(
- MultipliedForInterest, m_OffsetAddend, "AddedForInterest");
- m_CurrentIndex = AddedForInterest;
- }
- void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC,
- Value *TheValue) {
- assert(m_RemainingReservedSpaceInBytes > 0);
- auto TheValueTypeID = TheValue->getType()->getTypeID();
- if (TheValueTypeID == Type::TypeID::DoubleTyID) {
- Function *SplitDouble =
- BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
- Constant *SplitDoubleOpcode =
- BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
- auto SplitDoubleIntruction = BC.Builder.CreateCall(
- SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble");
- auto LowBits =
- BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
- auto HighBits =
- BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
- // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
- addDebugEntryValue(BC, LowBits);
- addDebugEntryValue(BC, HighBits);
- } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
- TheValue->getType()->getIntegerBitWidth() == 64) {
- auto LowBits =
- BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
- auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
- auto HighBits = BC.Builder.CreateTrunc(
- ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
- // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
- addDebugEntryValue(BC, LowBits);
- addDebugEntryValue(BC, HighBits);
- } else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
- (TheValue->getType()->getIntegerBitWidth() == 16 ||
- TheValue->getType()->getIntegerBitWidth() == 1)) {
- auto As32 =
- BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
- addDebugEntryValue(BC, As32);
- } else if (TheValueTypeID == Type::TypeID::HalfTyID) {
- auto AsFloat =
- BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
- addDebugEntryValue(BC, AsFloat);
- } else {
- Function *StoreValue =
- BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore,
- TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
- Constant *StoreValueOpcode =
- BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
- UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
- UndefValue *UndefArg = nullptr;
- if (TheValueTypeID == Type::TypeID::IntegerTyID) {
- UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
- } else if (TheValueTypeID == Type::TypeID::FloatTyID) {
- UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx));
- } else {
- // The above are the only two valid types for a UAV store
- assert(false);
- }
- Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
- (void)BC.Builder.CreateCall(
- StoreValue, {StoreValueOpcode, // i32 opcode
- m_HandleForUAV, // %dx.types.Handle, ; resource handle
- m_CurrentIndex, // i32 c0: index in bytes into UAV
- Undef32Arg, // i32 c1: unused
- TheValue,
- UndefArg, // unused values
- UndefArg, // unused values
- UndefArg, // unused values
- WriteMask_X});
- m_RemainingReservedSpaceInBytes -= 4;
- assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow
- if (m_RemainingReservedSpaceInBytes != 0) {
- m_CurrentIndex =
- BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4));
- } else {
- m_CurrentIndex = nullptr;
- }
- }
- }
- void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
- DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0};
- reserveDebugEntrySpace(BC, sizeof(marker));
- marker.Header.Details.SizeDwords =
- DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));
- ;
- marker.Header.Details.Flags = 0;
- marker.Header.Details.Type =
- DebugShaderModifierRecordTypeInvocationStartMarker;
- addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
- addDebugEntryValue(BC, m_InvocationId);
- }
- template <typename ReturnType>
- void DxilDebugInstrumentation::addStepEntryForType(
- DebugShaderModifierRecordType RecordType, BuilderContext &BC,
- std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal,
- Value *ValueOrdinalIndex) {
- DebugShaderModifierRecordDXILStep<ReturnType> step = {};
- reserveDebugEntrySpace(BC, sizeof(step));
- step.Header.Details.SizeDwords =
- DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
- step.Header.Details.Type = static_cast<uint8_t>(RecordType);
- addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
- addDebugEntryValue(BC, m_InvocationId);
- addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum));
- if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) {
- addDebugEntryValue(BC, V);
- IRBuilder<> &B = BC.Builder;
- Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16);
- Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF),
- "ValueOrdinalIndex");
- Value *EncodedValueOrdinalAndIndex =
- BC.Builder.CreateOr(VO, VOI, "ValueOrdinal");
- addDebugEntryValue(BC, EncodedValueOrdinalAndIndex);
- }
- }
- void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext& BC,
- StoreInst* Inst) {
- std::uint32_t ValueOrdinalBase;
- std::uint32_t UnusedValueOrdinalSize;
- llvm::Value* ValueOrdinalIndex;
- if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
- &UnusedValueOrdinalSize,
- &ValueOrdinalIndex)) {
- return;
- }
- std::uint32_t InstNum;
- if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
- return;
- }
- if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst->getValueOperand())) {
- return;
- }
- addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase,
- ValueOrdinalIndex);
- }
- void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC,
- Instruction *Inst) {
- if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
- return;
- }
- if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst)) {
- return;
- }
- if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
- addStoreStepDebugEntry(BC, St);
- return;
- }
- std::uint32_t RegNum;
- if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) {
- return;
- }
- std::uint32_t InstNum;
- if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
- return;
- }
- addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0));
- }
- void DxilDebugInstrumentation::addStepDebugEntryValue(
- BuilderContext &BC, std::uint32_t InstNum, Value *V,
- std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) {
- const Type::TypeID ID = V->getType()->getTypeID();
- switch (ID) {
- case Type::TypeID::StructTyID:
- case Type::TypeID::VoidTyID:
- addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC,
- InstNum, V, ValueOrdinal, ValueOrdinalIndex);
- break;
- case Type::TypeID::FloatTyID:
- addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
- InstNum, V, ValueOrdinal, ValueOrdinalIndex);
- break;
- case Type::TypeID::IntegerTyID:
- if (V->getType()->getIntegerBitWidth() == 64) {
- addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64,
- BC, InstNum, V, ValueOrdinal,
- ValueOrdinalIndex);
- } else {
- addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32,
- BC, InstNum, V, ValueOrdinal,
- ValueOrdinalIndex);
- }
- break;
- case Type::TypeID::DoubleTyID:
- addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC,
- InstNum, V, ValueOrdinal, ValueOrdinalIndex);
- break;
- case Type::TypeID::HalfTyID:
- addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC,
- InstNum, V, ValueOrdinal, ValueOrdinalIndex);
- break;
- case Type::TypeID::PointerTyID:
- // Skip pointer calculation instructions. They aren't particularly
- // meaningful to the user (being a mere implementation detail for lookup
- // tables, etc.), and their type is problematic from a UI point of view. The
- // subsequent instructions that dereference the pointer will be properly
- // instrumented and show the (meaningful) retrieved value.
- break;
- case Type::TypeID::FP128TyID:
- case Type::TypeID::LabelTyID:
- case Type::TypeID::MetadataTyID:
- case Type::TypeID::FunctionTyID:
- case Type::TypeID::ArrayTyID:
- case Type::TypeID::VectorTyID:
- case Type::TypeID::X86_FP80TyID:
- case Type::TypeID::X86_MMXTyID:
- case Type::TypeID::PPC_FP128TyID:
- assert(false);
- }
- }
- bool DxilDebugInstrumentation::runOnModule(Module &M) {
- DxilModule &DM = M.GetOrCreateDxilModule();
- LLVMContext &Ctx = M.getContext();
- OP *HlslOP = DM.GetOP();
- auto ShaderModel = DM.GetShaderModel();
- switch (ShaderModel->GetKind()) {
- case DXIL::ShaderKind::Amplification:
- case DXIL::ShaderKind::Mesh:
- case DXIL::ShaderKind::Vertex:
- case DXIL::ShaderKind::Geometry:
- case DXIL::ShaderKind::Pixel:
- case DXIL::ShaderKind::Compute:
- break;
- default:
- return false;
- }
- // First record pointers to all instructions in the function:
- std::vector<Instruction *> AllInstructions;
- for (inst_iterator I = inst_begin(DM.GetEntryFunction()),
- E = inst_end(DM.GetEntryFunction());
- I != E; ++I) {
- AllInstructions.push_back(&*I);
- }
- // Branchless instrumentation requires taking care of a few things:
- // -Each invocation of the shader will be either of interest or not of
- // interest
- // -If of interest, the offset into the output UAV will be as expected
- // -If not, the offset is forced to (UAVsize) - (Small Amount), and that
- // output is ignored by the CPU-side code.
- // -The invocation of interest may overflow the UAV. This is handled by taking
- // the modulus of the
- // output index. Overflow is then detected on the CPU side by checking for
- // the presence of a canary value at (UAVSize) - (Small Amount) * 2 (which is
- // actually a conservative definition of overflow).
- //
- Instruction *firstInsertionPt =
- dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
- IRBuilder<> Builder(firstInsertionPt);
- BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
- m_HandleForUAV = PIXPassHelpers::CreateUAV(BC.DM, BC.Builder, 0, "PIX_DebugUAV_Handle");
- auto SystemValues = addRequiredSystemValues(BC);
- addInvocationSelectionProlog(BC, SystemValues);
- addInvocationStartMarker(BC);
- // Explicitly name new blocks in order to provide stable names for testing purposes
- int NewBlockCounter = 0;
- auto Fn = DM.GetEntryFunction();
- auto &Blocks = Fn->getBasicBlockList();
- for (auto &CurrentBlock : Blocks) {
- struct ValueAndPhi {
- Value *Val;
- PHINode *Phi;
- unsigned Index;
- };
- std::map<BasicBlock *, std::vector<ValueAndPhi>> InsertableEdges;
- auto &Is = CurrentBlock.getInstList();
- for (auto &Inst : Is) {
- if (Inst.getOpcode() != Instruction::OtherOps::PHI) {
- break;
- }
- PHINode &PN = llvm::cast<PHINode>(Inst);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- BasicBlock *PhiBB = PN.getIncomingBlock(i);
- Value *PhiVal = PN.getIncomingValue(i);
- InsertableEdges[PhiBB].push_back({PhiVal, &PN, i});
- }
- }
- for (auto &InsertableEdge : InsertableEdges) {
- auto *NewBlock = BasicBlock::Create(Ctx, "PIXDebug" + std::to_string(NewBlockCounter++),
- InsertableEdge.first->getParent());
- IRBuilder<> Builder(NewBlock);
- auto *PreviousBlock = InsertableEdge.first;
- // Modify all successor operands of the terminator in the previous block
- // that match the current block to point to the new block:
- TerminatorInst *terminator = PreviousBlock->getTerminator();
- unsigned NumSuccessors = terminator->getNumSuccessors();
- for (unsigned SuccessorIndex = 0; SuccessorIndex < NumSuccessors;
- ++SuccessorIndex) {
- auto *CurrentSuccessor = terminator->getSuccessor(SuccessorIndex);
- if (CurrentSuccessor == &CurrentBlock) {
- terminator->setSuccessor(SuccessorIndex, NewBlock);
- }
- }
- // Modify the Phis and add debug instrumentation
- for (auto &ValueNPhi : InsertableEdge.second) {
- // Modify the phi to refer to the new block:
- ValueNPhi.Phi->setIncomingBlock(ValueNPhi.Index, NewBlock);
- // Add instrumentation to the new block
- std::uint32_t RegNum;
- if (!pix_dxil::PixDxilReg::FromInst(ValueNPhi.Phi, &RegNum)) {
- continue;
- }
- std::uint32_t InstNum;
- if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) {
- continue;
- }
- BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
- addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum,
- BC.Builder.getInt32(0));
- }
- // Add a branch to the new block to point to the current block
- Builder.CreateBr(&CurrentBlock);
- }
- }
- // Instrument original instructions:
- for (auto &Inst : AllInstructions) {
- // Instrumentation goes after the instruction if it is not a terminator.
- // Otherwise, Instrumentation goes prior to the instruction.
- if (!Inst->isTerminator()) {
- IRBuilder<> Builder(Inst->getNextNode());
- BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
- addStepDebugEntry(BC2, Inst);
- } else {
- // Insert before this instruction
- IRBuilder<> Builder(Inst);
- BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
- addStepDebugEntry(BC2, Inst);
- }
- }
- DM.ReEmitDxilResources();
- return true;
- }
- char DxilDebugInstrumentation::ID = 0;
- ModulePass *llvm::createDxilDebugInstrumentationPass() {
- return new DxilDebugInstrumentation();
- }
- INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation",
- "HLSL DXIL debug instrumentation for PIX", false, false)
|