/////////////////////////////////////////////////////////////////////////////// // // // DxilShaderAccessTracking.cpp // // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // // Provides a pass to add instrumentation to determine pixel hit count and // // cost. Used by PIX. // // // /////////////////////////////////////////////////////////////////////////////// #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilInstructions.h" #include "dxc/DXIL/DxilModule.h" #include "dxc/DxilPIXPasses/DxilPIXPasses.h" #include "dxc/HLSL/DxilGenerationPass.h" #include "dxc/HLSL/DxilSpanAllocator.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Transforms/Utils/Local.h" #include #ifdef _WIN32 #include #endif using namespace llvm; using namespace hlsl; void ThrowIf(bool a) { if (a) { throw ::hlsl::Exception(E_INVALIDARG); } } //--------------------------------------------------------------------------------------------------------------------------------- // These types are taken from PIX's ShaderAccessHelpers.h enum class ShaderAccessFlags : uint32_t { None = 0, Read = 1 << 0, Write = 1 << 1, // "Counter" access is only applicable to UAVs; it means the counter buffer // attached to the UAV was accessed, but not necessarily the UAV resource. Counter = 1 << 2, // Descriptor-only read (if any), but not the resource contents (if any). // Used for GetDimensions, samplers, and secondary texture for sampler // feedback. // TODO: Make this a unique value if supported in PIX, then enable // GetDimensions DescriptorRead = 1 << 0, }; constexpr uint32_t DWORDsPerResource = 3; constexpr uint32_t BytesPerDWORD = 4; static uint32_t OffsetFromAccess(ShaderAccessFlags access) { switch (access) { case ShaderAccessFlags::Read: return 0; case ShaderAccessFlags::Write: return 1; case ShaderAccessFlags::Counter: return 2; default: throw ::hlsl::Exception(E_INVALIDARG); } } // This enum doesn't have to match PIX's version, because the values are // received from PIX encoded in ASCII. However, for ease of comparing this code // with PIX, and to be less confusing to future maintainers, this enum does // indeed match the same-named enum in PIX. enum class RegisterType { CBV, SRV, UAV, RTV, // not used. DSV, // not used. Sampler, SOV, // not used. Invalid, Terminator }; RegisterType RegisterTypeFromResourceClass(DXIL::ResourceClass c) { switch (c) { case DXIL::ResourceClass::SRV: return RegisterType::SRV; break; case DXIL::ResourceClass::UAV: return RegisterType::UAV; break; case DXIL::ResourceClass::CBuffer: return RegisterType::CBV; break; case DXIL::ResourceClass::Sampler: return RegisterType::Sampler; break; case DXIL::ResourceClass::Invalid: return RegisterType::Invalid; break; default: ThrowIf(true); return RegisterType::Invalid; } } struct RegisterTypeAndSpace { bool operator<(const RegisterTypeAndSpace &o) const { return static_cast(Type) < static_cast(o.Type) || (static_cast(Type) == static_cast(o.Type) && Space < o.Space); } RegisterType Type; unsigned Space; }; // Identifies a bind point as defined by the root signature struct RSRegisterIdentifier { RegisterType Type; unsigned Space; unsigned Index; bool operator<(const RSRegisterIdentifier &o) const { return static_cast(Type) < static_cast(o.Type) && Space < o.Space && Index < o.Index; } }; struct SlotRange { unsigned startSlot; unsigned numSlots; // Number of slots needed if no descriptors from unbounded ranges are included unsigned numInvariableSlots; }; struct DxilResourceAndClass { DxilResourceBase *resource; Value *index; DXIL::ResourceClass resClass; }; //--------------------------------------------------------------------------------------------------------------------------------- class DxilShaderAccessTracking : public ModulePass { public: static char ID; // Pass identification, replacement for typeid explicit DxilShaderAccessTracking() : ModulePass(ID) {} const char *getPassName() const override { return "DXIL shader access tracking"; } bool runOnModule(Module &M) override; void applyOptions(PassOptions O) override; private: void EmitAccess(LLVMContext &Ctx, OP *HlslOP, IRBuilder<> &, Value *slot, ShaderAccessFlags access); bool EmitResourceAccess(DxilResourceAndClass &res, Instruction *instruction, OP *HlslOP, LLVMContext &Ctx, ShaderAccessFlags readWrite); private: bool m_CheckForDynamicIndexing = false; std::map m_slotAssignments; std::map m_FunctionToUAVHandle; std::set m_DynamicallyIndexedBindPoints; }; static unsigned DeserializeInt(std::deque &q) { unsigned i = 0; while (!q.empty() && isdigit(q.front())) { i *= 10; i += q.front() - '0'; q.pop_front(); } return i; } static char DequeFront(std::deque &q) { ThrowIf(q.empty()); auto c = q.front(); q.pop_front(); return c; } static RegisterType ParseRegisterType(std::deque &q) { switch (DequeFront(q)) { case 'C': return RegisterType::CBV; case 'S': return RegisterType::SRV; case 'U': return RegisterType::UAV; case 'M': return RegisterType::Sampler; case 'I': return RegisterType::Invalid; default: return RegisterType::Terminator; } } static char EncodeRegisterType(RegisterType r) { switch (r) { case RegisterType::CBV: return 'C'; case RegisterType::SRV: return 'S'; case RegisterType::UAV: return 'U'; case RegisterType::Sampler: return 'M'; case RegisterType::Invalid: return 'I'; } return '.'; } static void ValidateDelimiter(std::deque &q, char d) { ThrowIf(q.front() != d); q.pop_front(); } void DxilShaderAccessTracking::applyOptions(PassOptions O) { int checkForDynamic; GetPassOptionInt(O, "checkForDynamicIndexing", &checkForDynamic, 0); m_CheckForDynamicIndexing = checkForDynamic != 0; StringRef configOption; if (GetPassOption(O, "config", &configOption)) { std::deque config; config.assign(configOption.begin(), configOption.end()); // Parse slot assignments. Compare with PIX's ShaderAccessHelpers.cpp // (TrackingConfiguration::SerializedRepresentation) RegisterType rt = ParseRegisterType(config); while (rt != RegisterType::Terminator) { RegisterTypeAndSpace rst; rst.Type = rt; rst.Space = DeserializeInt(config); ValidateDelimiter(config, ':'); SlotRange sr; sr.startSlot = DeserializeInt(config); ValidateDelimiter(config, ':'); sr.numSlots = DeserializeInt(config); ValidateDelimiter(config, 'i'); sr.numInvariableSlots = DeserializeInt(config); ValidateDelimiter(config, ';'); m_slotAssignments[rst] = sr; rt = ParseRegisterType(config); } } } void DxilShaderAccessTracking::EmitAccess(LLVMContext &Ctx, OP *HlslOP, IRBuilder<> &Builder, Value *ByteIndex, ShaderAccessFlags access) { unsigned OffsetForAccessType = static_cast(OffsetFromAccess(access) * BytesPerDWORD); auto OffsetByteIndex = Builder.CreateAdd( ByteIndex, HlslOP->GetU32Const(OffsetForAccessType), "OffsetByteIndex"); UndefValue *UndefIntArg = UndefValue::get(Type::getInt32Ty(Ctx)); Constant *LiteralOne = HlslOP->GetU32Const(1); Constant *ElementMask = HlslOP->GetI8Const(1); Function *StoreFunc = HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(Ctx)); Constant *StoreOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::BufferStore); (void)Builder.CreateCall( StoreFunc, { StoreOpcode, // i32, ; opcode m_FunctionToUAVHandle.at( Builder.GetInsertBlock() ->getParent()), // %dx.types.Handle, ; resource handle OffsetByteIndex, // i32, ; coordinate c0: byte offset UndefIntArg, // i32, ; coordinate c1 (unused) LiteralOne, // i32, ; value v0 UndefIntArg, // i32, ; value v1 UndefIntArg, // i32, ; value v2 UndefIntArg, // i32, ; value v3 ElementMask // i8 ; just the first value is used }); } bool DxilShaderAccessTracking::EmitResourceAccess(DxilResourceAndClass &res, Instruction *instruction, OP *HlslOP, LLVMContext &Ctx, ShaderAccessFlags readWrite) { RegisterTypeAndSpace typeAndSpace{RegisterTypeFromResourceClass(res.resClass), res.resource->GetSpaceID()}; auto slot = m_slotAssignments.find(typeAndSpace); // If the assignment isn't found, we assume it's not accessed if (slot != m_slotAssignments.end()) { IRBuilder<> Builder(instruction); Value *slotIndex; if (isa(res.index)) { unsigned index = cast(res.index)->getLimitedValue(); if (index > slot->second.numSlots) { // out-of-range accesses are written to slot zero: slotIndex = HlslOP->GetU32Const(0); } else { slotIndex = HlslOP->GetU32Const((slot->second.startSlot + index) * DWORDsPerResource * BytesPerDWORD); } } else { RSRegisterIdentifier id{typeAndSpace.Type, typeAndSpace.Space, res.resource->GetID()}; m_DynamicallyIndexedBindPoints.emplace(std::move(id)); // CompareWithSlotLimit will contain 1 if the access is out-of-bounds // (both over- and and under-flow via the unsigned >= with slot count) auto CompareWithSlotLimit = Builder.CreateICmpUGE( res.index, HlslOP->GetU32Const(slot->second.numSlots), "CompareWithSlotLimit"); auto CompareWithSlotLimitAsUint = Builder.CreateCast( Instruction::CastOps::ZExt, CompareWithSlotLimit, Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint"); // IsInBounds will therefore contain 0 if the access is out-of-bounds, and // 1 otherwise. auto IsInBounds = Builder.CreateSub( HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds"); auto SlotDwordOffset = Builder.CreateAdd( res.index, HlslOP->GetU32Const(slot->second.startSlot), "SlotDwordOffset"); auto SlotByteOffset = Builder.CreateMul( SlotDwordOffset, HlslOP->GetU32Const(DWORDsPerResource * BytesPerDWORD), "SlotByteOffset"); // This will drive an out-of-bounds access slot down to 0 slotIndex = Builder.CreateMul(SlotByteOffset, IsInBounds, "slotIndex"); } EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite); return true; // did modify } return false; // did not modify } DxilResourceAndClass GetResourceFromHandle(Value *resHandle, DxilModule &DM) { DxilResourceAndClass ret{nullptr, nullptr, DXIL::ResourceClass::Invalid}; CallInst *handle = cast(resHandle); DxilInst_CreateHandle createHandle(handle); // Dynamic rangeId is not supported - skip and let validation report the // error. if (!isa(createHandle.get_rangeId())) return ret; unsigned rangeId = cast(createHandle.get_rangeId())->getLimitedValue(); auto resClass = static_cast(createHandle.get_resourceClass_val()); switch (resClass) { case DXIL::ResourceClass::SRV: ret.resource = &DM.GetSRV(rangeId); break; case DXIL::ResourceClass::UAV: ret.resource = &DM.GetUAV(rangeId); break; case DXIL::ResourceClass::CBuffer: ret.resource = &DM.GetCBuffer(rangeId); break; case DXIL::ResourceClass::Sampler: ret.resource = &DM.GetSampler(rangeId); break; default: DXASSERT(0, "invalid res class"); return ret; } ret.index = createHandle.get_index(); ret.resClass = resClass; return ret; } bool DxilShaderAccessTracking::runOnModule(Module &M) { // This pass adds instrumentation for shader access to resources DxilModule &DM = M.GetOrCreateDxilModule(); LLVMContext &Ctx = M.getContext(); OP *HlslOP = DM.GetOP(); bool Modified = false; if (m_CheckForDynamicIndexing) { bool FoundDynamicIndexing = false; auto CreateHandleFn = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); auto CreateHandleUses = CreateHandleFn->uses(); for (auto FI = CreateHandleUses.begin(); FI != CreateHandleUses.end();) { auto &FunctionUse = *FI++; auto FunctionUser = FunctionUse.getUser(); auto instruction = cast(FunctionUser); Value *index = instruction->getOperand(3); if (!isa(index)) { FoundDynamicIndexing = true; break; } } if (FoundDynamicIndexing) { if (OSOverride != nullptr) { formatted_raw_ostream FOS(*OSOverride); FOS << "FoundDynamicIndexing"; } } } else { { if (DM.m_ShaderFlags.GetForceEarlyDepthStencil()) { if (OSOverride != nullptr) { formatted_raw_ostream FOS(*OSOverride); FOS << "ShouldAssumeDsvAccess"; } } for (llvm::Function &F : M.functions()) { if (!F.getBasicBlockList().empty()) { IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt()); unsigned int UAVResourceHandle = static_cast(DM.GetUAVs().size()); // Set up a UAV with structure of a single int SmallVector Elements{Type::getInt32Ty(Ctx)}; llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "class.RWStructuredBuffer"); std::unique_ptr pUAV = llvm::make_unique(); pUAV->SetGlobalName("PIX_CountUAVName"); pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo())); pUAV->SetID(UAVResourceHandle); pUAV->SetSpaceID(( unsigned int)-2); // This is the reserved-for-tools register space pUAV->SetSampleCount(1); pUAV->SetGloballyCoherent(false); pUAV->SetHasCounter(false); pUAV->SetCompType(CompType::getI32()); pUAV->SetLowerBound(0); pUAV->SetRangeSize(1); pUAV->SetKind(DXIL::ResourceKind::RawBuffer); auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy); if (pAnnotation == nullptr) { pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy); pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0); pAnnotation->GetFieldAnnotation(0).SetCompType( hlsl::DXIL::ComponentType::I32); pAnnotation->GetFieldAnnotation(0).SetFieldName("count"); } ID = DM.AddUAV(std::move(pUAV)); assert((unsigned)ID == UAVResourceHandle); // Create handle for the newly-added UAV Function *CreateHandleOpFunc = HlslOP->GetOpFunc( DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); Constant *CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle); Constant *UAVArg = HlslOP->GetI8Const( static_cast::type>( DXIL::ResourceClass::UAV)); Constant *MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the // corresponding metadata list Constant *IndexArg = HlslOP->GetU32Const(0); // Constant *FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false m_FunctionToUAVHandle[&F] = Builder.CreateCall( CreateHandleOpFunc, {CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg}, "PIX_CountUAV_Handle"); } } DM.ReEmitDxilResources(); } for (llvm::Function &F : M.functions()) { // Only used DXIL intrinsics: if (!F.isDeclaration() || F.isIntrinsic() || F.use_empty() || !OP::IsDxilOpFunc(&F)) continue; // Gather handle parameter indices, if any FunctionType *fnTy = cast(F.getType()->getPointerElementType()); SmallVector handleParams; for (unsigned iParam = 1; iParam < fnTy->getFunctionNumParams(); ++iParam) { if (fnTy->getParamType(iParam) == HlslOP->GetHandleType()) handleParams.push_back(iParam); } if (handleParams.empty()) continue; auto FunctionUses = F.uses(); for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { auto &FunctionUse = *FI++; auto FunctionUser = FunctionUse.getUser(); auto Call = cast(FunctionUser); auto opCode = OP::GetDxilOpFuncCallInst(Call); // Base Read/Write on function attribute - should match for all normal // resource operations ShaderAccessFlags readWrite = ShaderAccessFlags::Write; if (OP::GetMemAccessAttr(opCode) == llvm::Attribute::AttrKind::ReadOnly) readWrite = ShaderAccessFlags::Read; // Special cases switch (opCode) { case DXIL::OpCode::GetDimensions: // readWrite = ShaderAccessFlags::DescriptorRead; // TODO: Support // GetDimensions continue; case DXIL::OpCode::BufferUpdateCounter: readWrite = ShaderAccessFlags::Counter; break; case DXIL::OpCode::TraceRay: // Read of AccelerationStructure; doesn't match function attribute // readWrite = ShaderAccessFlags::Read; // TODO: Support continue; case DXIL::OpCode::RayQuery_TraceRayInline: { // Read of AccelerationStructure; doesn't match function attribute auto res = GetResourceFromHandle(Call->getArgOperand(2), DM); if (EmitResourceAccess( res, Call, HlslOP, Ctx, ShaderAccessFlags::Read)) { Modified = true; } } continue; default: break; } for (unsigned iParam : handleParams) { auto res = GetResourceFromHandle(Call->getArgOperand(iParam), DM); // Don't instrument the accesses to the UAV that we just added if (res.resClass == DXIL::ResourceClass::UAV && res.resource->GetSpaceID() == (unsigned)-2) { break; } if (EmitResourceAccess(res, Call, HlslOP, Ctx, readWrite)) { Modified = true; } // Remaining resources are DescriptorRead. readWrite = ShaderAccessFlags::DescriptorRead; } } } if (OSOverride != nullptr) { formatted_raw_ostream FOS(*OSOverride); FOS << "DynamicallyIndexedBindPoints="; for (auto const &bp : m_DynamicallyIndexedBindPoints) { FOS << EncodeRegisterType(bp.Type) << bp.Space << ':' << bp.Index << ';'; } FOS << "."; } } return Modified; } char DxilShaderAccessTracking::ID = 0; ModulePass *llvm::createDxilShaderAccessTrackingPass() { return new DxilShaderAccessTracking(); } INITIALIZE_PASS(DxilShaderAccessTracking, "hlsl-dxil-pix-shader-access-instrumentation", "HLSL DXIL shader access tracking for PIX", false, false)