|
@@ -31,16 +31,20 @@
|
|
|
// Keep this in sync with the same-named value in the debugger application's
|
|
|
// WinPixShaderUtils.h
|
|
|
constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
|
|
|
+constexpr uint64_t MaxSizePerRecord = 64;
|
|
|
|
|
|
// Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
|
|
|
constexpr uint32_t triangleIndexIndicator = 1;
|
|
|
constexpr uint32_t int32ValueIndicator = 2;
|
|
|
constexpr uint32_t floatValueIndicator = 3;
|
|
|
+constexpr uint32_t int16ValueIndicator = 4;
|
|
|
+constexpr uint32_t float16ValueIndicator = 5;
|
|
|
|
|
|
using namespace llvm;
|
|
|
using namespace hlsl;
|
|
|
|
|
|
-class DxilPIXMeshShaderOutputInstrumentation : public ModulePass {
|
|
|
+class DxilPIXMeshShaderOutputInstrumentation : public ModulePass
|
|
|
+{
|
|
|
public:
|
|
|
static char ID; // Pass identification, replacement for typeid
|
|
|
explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
|
|
@@ -75,15 +79,18 @@ private:
|
|
|
template <typename... T> void Instrument(BuilderContext &BC, T... values);
|
|
|
};
|
|
|
|
|
|
-void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) {
|
|
|
+void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O)
|
|
|
+{
|
|
|
GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
|
|
|
}
|
|
|
|
|
|
-uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() {
|
|
|
+uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset()
|
|
|
+{
|
|
|
return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
|
|
|
}
|
|
|
|
|
|
-CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
|
|
|
+CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC)
|
|
|
+{
|
|
|
// Set up a UAV with structure of a single int
|
|
|
unsigned int UAVResourceHandle =
|
|
|
static_cast<unsigned int>(BC.DM.GetUAVs().size());
|
|
@@ -130,7 +137,8 @@ CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
|
|
|
}
|
|
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::
|
|
|
- insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC) {
|
|
|
+ insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
|
|
|
+{
|
|
|
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
|
|
|
Constant *One32Arg = BC.HlslOP->GetU32Const(1);
|
|
|
|
|
@@ -150,7 +158,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
|
|
|
}
|
|
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::
|
|
|
- insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) {
|
|
|
+ insertInstructionsToCalculateGroupIdZ(BuilderContext &BC)
|
|
|
+{
|
|
|
Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
|
|
|
auto GroupIdFunc =
|
|
|
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
|
|
@@ -160,9 +169,15 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
|
|
|
}
|
|
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
|
|
- BuilderContext &BC, uint32_t SpaceInBytes) {
|
|
|
- assert(m_RemainingReservedSpaceInBytes ==
|
|
|
- 0); // or else the previous caller reserved too much space
|
|
|
+ BuilderContext &BC, uint32_t SpaceInBytes)
|
|
|
+{
|
|
|
+
|
|
|
+ // Check the previous caller didn't reserve too much space:
|
|
|
+ assert(m_RemainingReservedSpaceInBytes == 0);
|
|
|
+
|
|
|
+ // Check that the caller didn't ask for so much memory that it will
|
|
|
+ // overwrite the offset counter:
|
|
|
+ assert(m_RemainingReservedSpaceInBytes < MaxSizePerRecord);
|
|
|
|
|
|
m_RemainingReservedSpaceInBytes = SpaceInBytes;
|
|
|
|
|
@@ -173,7 +188,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
|
|
BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
|
|
|
Constant *AtomicAdd =
|
|
|
BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
|
|
|
- Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset());
|
|
|
+ Constant *OffsetArg =
|
|
|
+ BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + MaxSizePerRecord);
|
|
|
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
|
|
|
|
|
|
Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
|
|
@@ -196,7 +212,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
|
|
}
|
|
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
|
|
|
- BuilderContext &BC, Value *TheOffset, Value *TheValue) {
|
|
|
+ BuilderContext &BC, Value *TheOffset, Value *TheValue)
|
|
|
+{
|
|
|
|
|
|
Function *StoreValue =
|
|
|
BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
|
|
@@ -228,18 +245,21 @@ Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
|
|
|
|
|
|
template <typename... T>
|
|
|
void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
|
|
|
- T... values) {
|
|
|
+ T... values)
|
|
|
+{
|
|
|
llvm::SmallVector<llvm::Value *, 10> Values(
|
|
|
{static_cast<llvm::Value *>(values)...});
|
|
|
const uint32_t DwordCount = Values.size();
|
|
|
llvm::Value *byteOffset =
|
|
|
reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
|
|
|
- for (llvm::Value *V : Values) {
|
|
|
+ for (llvm::Value *V : Values)
|
|
|
+ {
|
|
|
byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|
|
+bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
|
|
|
+{
|
|
|
DxilModule &DM = M.GetOrCreateDxilModule();
|
|
|
LLVMContext &Ctx = M.getContext();
|
|
|
OP *HlslOP = DM.GetOP();
|
|
@@ -259,7 +279,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|
|
|
|
|
auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
|
|
|
auto FunctionUses = F->uses();
|
|
|
- for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
|
|
|
+ for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
|
|
|
+ {
|
|
|
auto &FunctionUse = *FI++;
|
|
|
auto FunctionUser = FunctionUse.getUser();
|
|
|
|
|
@@ -273,50 +294,79 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|
|
Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
|
|
|
}
|
|
|
|
|
|
- F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getInt32Ty(Ctx));
|
|
|
- FunctionUses = F->uses();
|
|
|
- for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
|
|
|
- auto &FunctionUse = *FI++;
|
|
|
- auto FunctionUser = FunctionUse.getUser();
|
|
|
-
|
|
|
- auto Call = cast<CallInst>(FunctionUser);
|
|
|
-
|
|
|
- IRBuilder<> Builder2(Call);
|
|
|
- BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
|
|
+ struct OutputType
|
|
|
+ {
|
|
|
+ Type *type;
|
|
|
+ uint32_t tag;
|
|
|
+ };
|
|
|
+ SmallVector<OutputType, 4> StoreVertexOutputOverloads
|
|
|
+ {
|
|
|
+ {Type::getInt32Ty(Ctx), int32ValueIndicator},
|
|
|
+ {Type::getInt16Ty(Ctx), int16ValueIndicator},
|
|
|
+ {Type::getFloatTy(Ctx), floatValueIndicator},
|
|
|
+ {Type::getHalfTy(Ctx), float16ValueIndicator}
|
|
|
+ };
|
|
|
|
|
|
+ for (auto const &Overload : StoreVertexOutputOverloads)
|
|
|
+ {
|
|
|
+ F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Overload.type);
|
|
|
+ FunctionUses = F->uses();
|
|
|
+ for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
|
|
|
{
|
|
|
- auto expandBits = BC2.Builder.CreateCast(
|
|
|
- Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
|
|
|
-
|
|
|
- Instrument(BC2, BC2.HlslOP->GetI32Const(int32ValueIndicator),
|
|
|
- GroupIdXandY, GroupIdZ, Call->getOperand(1),
|
|
|
- Call->getOperand(2), expandBits, Call->getOperand(4),
|
|
|
- Call->getOperand(5));
|
|
|
- }
|
|
|
- }
|
|
|
+ auto &FunctionUse = *FI++;
|
|
|
+ auto FunctionUser = FunctionUse.getUser();
|
|
|
|
|
|
- F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getFloatTy(Ctx));
|
|
|
- FunctionUses = F->uses();
|
|
|
- for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
|
|
|
- auto &FunctionUse = *FI++;
|
|
|
- auto FunctionUser = FunctionUse.getUser();
|
|
|
+ auto Call = cast<CallInst>(FunctionUser);
|
|
|
|
|
|
- auto Call = cast<CallInst>(FunctionUser);
|
|
|
+ IRBuilder<> Builder2(Call);
|
|
|
+ BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
|
|
|
|
|
- IRBuilder<> Builder2(Call);
|
|
|
- BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
|
|
+ // Expand column index to 32 bits:
|
|
|
+ auto ColumnIndex = BC2.Builder.CreateCast(
|
|
|
+ Instruction::ZExt,
|
|
|
+ Call->getOperand(3),
|
|
|
+ Type::getInt32Ty(Ctx));
|
|
|
|
|
|
- {
|
|
|
- auto expandBits = BC2.Builder.CreateCast(
|
|
|
- Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
|
|
|
-
|
|
|
- auto reinterpretFloatToInt = BC2.Builder.CreateCast(
|
|
|
- Instruction::BitCast, Call->getOperand(4), Type::getInt32Ty(Ctx));
|
|
|
+ // Coerce actual value to int32
|
|
|
+ Value *CoercedValue = Call->getOperand(4);
|
|
|
|
|
|
- Instrument(BC2, BC2.HlslOP->GetI32Const(floatValueIndicator),
|
|
|
- GroupIdXandY, GroupIdZ, Call->getOperand(1),
|
|
|
- Call->getOperand(2), expandBits, reinterpretFloatToInt,
|
|
|
- Call->getOperand(5));
|
|
|
+ if (Overload.tag == floatValueIndicator)
|
|
|
+ {
|
|
|
+ CoercedValue = BC2.Builder.CreateCast(
|
|
|
+ Instruction::BitCast,
|
|
|
+ CoercedValue,
|
|
|
+ Type::getInt32Ty(Ctx));
|
|
|
+ }
|
|
|
+ else if (Overload.tag == float16ValueIndicator)
|
|
|
+ {
|
|
|
+ auto * HalfInt = BC2.Builder.CreateCast(
|
|
|
+ Instruction::BitCast,
|
|
|
+ CoercedValue,
|
|
|
+ Type::getInt16Ty(Ctx));
|
|
|
+
|
|
|
+ CoercedValue = BC2.Builder.CreateCast(
|
|
|
+ Instruction::ZExt,
|
|
|
+ HalfInt,
|
|
|
+ Type::getInt32Ty(Ctx));
|
|
|
+ }
|
|
|
+ else if (Overload.tag == int16ValueIndicator)
|
|
|
+ {
|
|
|
+ CoercedValue = BC2.Builder.CreateCast(
|
|
|
+ Instruction::ZExt,
|
|
|
+ CoercedValue,
|
|
|
+ Type::getInt32Ty(Ctx));
|
|
|
+ }
|
|
|
+
|
|
|
+ Instrument(
|
|
|
+ BC2,
|
|
|
+ BC2.HlslOP->GetI32Const(Overload.tag),
|
|
|
+ GroupIdXandY,
|
|
|
+ GroupIdZ,
|
|
|
+ Call->getOperand(1),
|
|
|
+ Call->getOperand(2),
|
|
|
+ ColumnIndex,
|
|
|
+ CoercedValue,
|
|
|
+ Call->getOperand(5));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -327,7 +377,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|
|
|
|
|
char DxilPIXMeshShaderOutputInstrumentation::ID = 0;
|
|
|
|
|
|
-ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() {
|
|
|
+ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation()
|
|
|
+{
|
|
|
return new DxilPIXMeshShaderOutputInstrumentation();
|
|
|
}
|
|
|
|