/////////////////////////////////////////////////////////////////////////////// // // // DxilCondenseResources.cpp // // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // // Provides a pass to make resource IDs zero-based and dense. // // // /////////////////////////////////////////////////////////////////////////////// #include "dxc/HLSL/DxilGenerationPass.h" #include "dxc/HLSL/DxilOperations.h" #include "dxc/HLSL/DxilSignatureElement.h" #include "dxc/HLSL/DxilModule.h" #include "dxc/Support/Global.h" #include "dxc/HLSL/DxilTypeSystem.h" #include "dxc/HLSL/DxilInstructions.h" #include "dxc/HLSL/DxilSpanAllocator.h" #include "dxc/HLSL/HLMatrixLowerHelper.h" #include "dxc/HLSL/DxilUtil.h" #include "dxc/HLSL/HLModule.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/DebugInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/Local.h" #include #include using namespace llvm; using namespace hlsl; // Resource rangeID remap. namespace { struct ResourceID { DXIL::ResourceClass Class; // Resource class. unsigned ID; // Resource ID, as specified on entry. bool operator<(const ResourceID &other) const { if (Class < other.Class) return true; if (Class > other.Class) return false; if (ID < other.ID) return true; return false; } }; struct RemapEntry { ResourceID ResID; // Resource identity, as specified on entry. DxilResourceBase *Resource; // In-memory resource representation. unsigned Index; // Index in resource vector - new ID for the resource. }; typedef std::map RemapEntryCollection; template void BuildRewrites(const std::vector> &Rs, RemapEntryCollection &C) { const unsigned s = (unsigned)Rs.size(); for (unsigned i = 0; i < s; ++i) { const std::unique_ptr &R = Rs[i]; if (R->GetID() != i) { ResourceID RId = {R->GetClass(), R->GetID()}; RemapEntry RE = {RId, R.get(), i}; C[RId] = RE; } } } // Build m_rewrites, returns 'true' if any rewrites are needed. bool BuildRewriteMap(RemapEntryCollection &rewrites, DxilModule &DM) { BuildRewrites(DM.GetCBuffers(), rewrites); BuildRewrites(DM.GetSRVs(), rewrites); BuildRewrites(DM.GetUAVs(), rewrites); BuildRewrites(DM.GetSamplers(), rewrites); return !rewrites.empty(); } void ApplyRewriteMapOnResTable(RemapEntryCollection &rewrites, DxilModule &DM) { for (auto &entry : rewrites) { entry.second.Resource->SetID(entry.second.Index); } } } // namespace // Resource lowerBound allocation. namespace { template static bool AllocateDxilResource(const std::vector> &resourceList, LLVMContext &Ctx, unsigned AutoBindingSpace=0) { bool bChanged = false; SpacesAllocator SAlloc; for (auto &res : resourceList) { const unsigned space = res->GetSpaceID(); typename SpacesAllocator::Allocator &alloc = SAlloc.Get(space); if (res->IsAllocated()) { const unsigned reg = res->GetLowerBound(); const T *conflict = nullptr; if (res->IsUnbounded()) { const T *unbounded = alloc.GetUnbounded(); if (unbounded) { Ctx.emitError(Twine("more than one unbounded resource (") + unbounded->GetGlobalName() + (" and ") + res->GetGlobalName() + (") in space ") + Twine(space)); } else { conflict = alloc.Insert(res.get(), reg, res->GetUpperBound()); if (!conflict) alloc.SetUnbounded(res.get()); } } else { conflict = alloc.Insert(res.get(), reg, res->GetUpperBound()); } if (conflict) { Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) + Twine("resource ") + res->GetGlobalName() + Twine(" at register ") + Twine(reg) + Twine(" overlaps with resource ") + conflict->GetGlobalName() + Twine(" at register ") + Twine(conflict->GetLowerBound()) + Twine(", space ") + Twine(space)); } } } // Allocate. const unsigned space = AutoBindingSpace; typename SpacesAllocator::Allocator &alloc0 = SAlloc.Get(space); for (auto &res : resourceList) { if (!res->IsAllocated()) { DXASSERT(res->GetSpaceID() == 0, "otherwise non-zero space has no user register assignment"); unsigned reg = 0; bool success = false; if (res->IsUnbounded()) { const T *unbounded = alloc0.GetUnbounded(); if (unbounded) { Ctx.emitError(Twine("more than one unbounded resource (") + unbounded->GetGlobalName() + Twine(" and ") + res->GetGlobalName() + Twine(") in space ") + Twine(space)); } else { success = alloc0.AllocateUnbounded(res.get(), reg); if (success) alloc0.SetUnbounded(res.get()); } } else { success = alloc0.Allocate(res.get(), res->GetRangeSize(), reg); } if (success) { res->SetLowerBound(reg); res->SetSpaceID(space); bChanged = true; } else { Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) + Twine("resource ") + res->GetGlobalName() + Twine(" could not be allocated")); } } } return bChanged; } bool AllocateDxilResources(DxilModule &DM) { uint32_t AutoBindingSpace = DM.GetAutoBindingSpace(); if (AutoBindingSpace == UINT_MAX) { // For libraries, we don't allocate unless AutoBindingSpace is set. if (DM.GetShaderModel()->IsLib()) return false; // For shaders, we allocate in space 0 by default. AutoBindingSpace = 0; } bool bChanged = false; bChanged |= AllocateDxilResource(DM.GetCBuffers(), DM.GetCtx(), AutoBindingSpace); bChanged |= AllocateDxilResource(DM.GetSamplers(), DM.GetCtx(), AutoBindingSpace); bChanged |= AllocateDxilResource(DM.GetUAVs(), DM.GetCtx(), AutoBindingSpace); bChanged |= AllocateDxilResource(DM.GetSRVs(), DM.GetCtx(), AutoBindingSpace); return bChanged; } } // namespace class DxilCondenseResources : public ModulePass { private: RemapEntryCollection m_rewrites; public: static char ID; // Pass identification, replacement for typeid explicit DxilCondenseResources() : ModulePass(ID) {} const char *getPassName() const override { return "DXIL Condense Resources"; } bool runOnModule(Module &M) override { DxilModule &DM = M.GetOrCreateDxilModule(); // Skip lib. if (DM.GetShaderModel()->IsLib()) return false; // Remove unused resource. DM.RemoveUnusedResources(); // Make sure all resource types are dense; build a map of rewrites. if (BuildRewriteMap(m_rewrites, DM)) { // Rewrite all instructions that refer to resources in the map. ApplyRewriteMap(DM); } bool hasResource = DM.GetCBuffers().size() || DM.GetUAVs().size() || DM.GetSRVs().size() || DM.GetSamplers().size(); if (hasResource) { if (!DM.GetShaderModel()->IsLib()) { AllocateDxilResources(DM); PatchCreateHandle(DM); } } return true; } DxilResourceBase &GetFirstRewrite() const { DXASSERT_NOMSG(!m_rewrites.empty()); return *m_rewrites.begin()->second.Resource; } private: void ApplyRewriteMap(DxilModule &DM); // Add lowbound to create handle range index. void PatchCreateHandle(DxilModule &DM); }; void DxilCondenseResources::ApplyRewriteMap(DxilModule &DM) { for (Function &F : DM.GetModule()->functions()) { if (F.isDeclaration()) { continue; } for (inst_iterator iter = inst_begin(F), E = inst_end(F); iter != E; ++iter) { llvm::Instruction &I = *iter; DxilInst_CreateHandle CH(&I); if (!CH) continue; ResourceID RId; RId.Class = (DXIL::ResourceClass)CH.get_resourceClass_val(); RId.ID = (unsigned)llvm::dyn_cast(CH.get_rangeId()) ->getZExtValue(); RemapEntryCollection::iterator it = m_rewrites.find(RId); if (it == m_rewrites.end()) { continue; } CallInst *CI = cast(&I); Value *newRangeID = DM.GetOP()->GetU32Const(it->second.Index); CI->setArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx, newRangeID); } } ApplyRewriteMapOnResTable(m_rewrites, DM); } namespace { void PatchLowerBoundOfCreateHandle(CallInst *handle, DxilModule &DM) { DxilInst_CreateHandle createHandle(handle); DXASSERT_NOMSG(createHandle); DXIL::ResourceClass ResClass = static_cast(createHandle.get_resourceClass_val()); // Dynamic rangeId is not supported - skip and let validation report the // error. if (!isa(createHandle.get_rangeId())) return; unsigned rangeId = cast(createHandle.get_rangeId())->getLimitedValue(); DxilResourceBase *res = nullptr; switch (ResClass) { case DXIL::ResourceClass::SRV: res = &DM.GetSRV(rangeId); break; case DXIL::ResourceClass::UAV: res = &DM.GetUAV(rangeId); break; case DXIL::ResourceClass::CBuffer: res = &DM.GetCBuffer(rangeId); break; case DXIL::ResourceClass::Sampler: res = &DM.GetSampler(rangeId); break; default: DXASSERT(0, "invalid res class"); return; } IRBuilder<> Builder(handle); unsigned lowBound = res->GetLowerBound(); if (lowBound) { Value *Index = createHandle.get_index(); if (ConstantInt *cIndex = dyn_cast(Index)) { unsigned newIdx = lowBound + cIndex->getLimitedValue(); handle->setArgOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx, Builder.getInt32(newIdx)); } else { Value *newIdx = Builder.CreateAdd(Index, Builder.getInt32(lowBound)); handle->setArgOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx, newIdx); } } } static void PatchTBufferCreateHandle(CallInst *handle, DxilModule &DM, std::unordered_set &tbufferIDs) { DxilInst_CreateHandle createHandle(handle); DXASSERT_NOMSG(createHandle); DXIL::ResourceClass ResClass = static_cast(createHandle.get_resourceClass_val()); if (ResClass != DXIL::ResourceClass::CBuffer) return; Value *resID = createHandle.get_rangeId(); DXASSERT(isa(resID), "cannot handle dynamic resID for cbuffer CreateHandle"); if (!isa(resID)) return; unsigned rangeId = cast(resID)->getLimitedValue(); DxilResourceBase *res = &DM.GetCBuffer(rangeId); // For TBuffer, we need to switch resource type from CBuffer to SRV if (res->GetKind() == DXIL::ResourceKind::TBuffer) { // Track cbuffers IDs that are actually tbuffers tbufferIDs.insert(rangeId); hlsl::OP *hlslOP = DM.GetOP(); llvm::LLVMContext &Ctx = DM.GetCtx(); // Temporarily add SRV size to rangeID to guarantee unique new SRV ID Value *newRangeID = hlslOP->GetU32Const(rangeId + DM.GetSRVs().size()); handle->setArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx, newRangeID); // switch create handle to SRV handle->setArgOperand(DXIL::OperandIndex::kCreateHandleResClassOpIdx, hlslOP->GetU8Const( static_cast::type>( DXIL::ResourceClass::SRV))); Type *doubleTy = Type::getDoubleTy(Ctx); Type *i64Ty = Type::getInt64Ty(Ctx); // Replace corresponding cbuffer loads with typed buffer loads for (auto U = handle->user_begin(); U != handle->user_end(); ) { CallInst *I = cast(*(U++)); DXASSERT(I && OP::IsDxilOpFuncCallInst(I), "otherwise unexpected user of CreateHandle value"); DXIL::OpCode opcode = OP::GetDxilOpFuncCallInst(I); if (opcode == DXIL::OpCode::CBufferLoadLegacy) { DxilInst_CBufferLoadLegacy cbLoad(I); // Replace with appropriate buffer load instruction IRBuilder<> Builder(I); opcode = OP::OpCode::BufferLoad; Type *Ty = Type::getInt32Ty(Ctx); Function *BufLoad = hlslOP->GetOpFunc(opcode, Ty); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *undefI = UndefValue::get(Type::getInt32Ty(Ctx)); Value *offset = cbLoad.get_regIndex(); CallInst* load = Builder.CreateCall(BufLoad, {opArg, handle, offset, undefI}); // Find extractelement uses of cbuffer load and replace + generate bitcast as necessary for (auto LU = I->user_begin(); LU != I->user_end(); ) { ExtractValueInst *evInst = dyn_cast(*(LU++)); DXASSERT(evInst && evInst->getNumIndices() == 1, "user of cbuffer load result should be extractvalue"); uint64_t idx = evInst->getIndices()[0]; Type *EltTy = evInst->getType(); IRBuilder<> EEBuilder(evInst); Value *result = nullptr; if (EltTy != Ty) { // extract two values and DXIL::OpCode::MakeDouble or construct i64 if ((EltTy == doubleTy) || (EltTy == i64Ty)) { DXASSERT(idx < 2, "64-bit component index out of range"); // This assumes big endian order in tbuffer elements (is this correct?) Value *low = EEBuilder.CreateExtractValue(load, idx * 2); Value *high = EEBuilder.CreateExtractValue(load, idx * 2 + 1); if (EltTy == doubleTy) { opcode = OP::OpCode::MakeDouble; Function *MakeDouble = hlslOP->GetOpFunc(opcode, doubleTy); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); result = EEBuilder.CreateCall(MakeDouble, {opArg, low, high}); } else { high = EEBuilder.CreateZExt(high, i64Ty); low = EEBuilder.CreateZExt(low, i64Ty); high = EEBuilder.CreateShl(high, hlslOP->GetU64Const(32)); result = EEBuilder.CreateOr(high, low); } } else { result = EEBuilder.CreateExtractValue(load, idx); result = EEBuilder.CreateBitCast(result, EltTy); } } else { result = EEBuilder.CreateExtractValue(load, idx); } evInst->replaceAllUsesWith(result); evInst->eraseFromParent(); } } else if (opcode == DXIL::OpCode::CBufferLoad) { // TODO: Handle this, or prevent this for tbuffer DXASSERT(false, "otherwise CBufferLoad used for tbuffer rather than CBufferLoadLegacy"); } else { DXASSERT(false, "otherwise unexpected user of CreateHandle value"); } I->eraseFromParent(); } } } } void DxilCondenseResources::PatchCreateHandle(DxilModule &DM) { Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(DM.GetCtx())); for (User *U : createHandle->users()) { PatchLowerBoundOfCreateHandle(cast(U), DM); } } char DxilCondenseResources::ID = 0; bool llvm::AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense) { DxilModule &DM = M->GetOrCreateDxilModule(); RemapEntryCollection rewrites; if (BuildRewriteMap(rewrites, DM)) { *ppNonDense = rewrites.begin()->second.Resource; return false; } else { *ppNonDense = nullptr; return true; } } ModulePass *llvm::createDxilCondenseResourcesPass() { return new DxilCondenseResources(); } INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false) namespace { class DxilLowerCreateHandleForLib : public ModulePass { private: RemapEntryCollection m_rewrites; DxilModule *m_DM; bool m_HasDbgInfo; bool m_bIsLib; bool m_bLegalizationFailed; public: static char ID; // Pass identification, replacement for typeid explicit DxilLowerCreateHandleForLib() : ModulePass(ID) {} const char *getPassName() const override { return "DXIL Lower createHandleForLib"; } bool runOnModule(Module &M) override { DxilModule &DM = M.GetOrCreateDxilModule(); m_DM = &DM; // Clear llvm used to remove unused resource. m_DM->ClearLLVMUsed(); m_bIsLib = DM.GetShaderModel()->IsLib(); m_bLegalizationFailed = false; bool bChanged = false; unsigned numResources = DM.GetCBuffers().size() + DM.GetUAVs().size() + DM.GetSRVs().size() + DM.GetSamplers().size(); if (!numResources) return false; // Switch tbuffers to SRVs, as they have been treated as cbuffers up to this // point. if (DM.GetCBuffers().size()) bChanged = PatchTBuffers(DM) || bChanged; // Remove unused resource. DM.RemoveUnusedResourceSymbols(); unsigned newResources = DM.GetCBuffers().size() + DM.GetUAVs().size() + DM.GetSRVs().size() + DM.GetSamplers().size(); bChanged = bChanged || (numResources != newResources); if (0 == newResources) return bChanged; bChanged |= AllocateDxilResources(DM); if (m_bIsLib && DM.GetShaderModel()->GetMinor() == ShaderModel::kOfflineMinor) return bChanged; // Make sure no select on resource. bChanged |= RemovePhiOnResource(); if (m_bIsLib || m_bLegalizationFailed) return bChanged; bChanged = true; // Load up debug information, to cross-reference values and the instructions // used to load them. m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0; GenerateDxilResourceHandles(); if (DM.GetOP()->UseMinPrecision()) UpdateStructTypeForLegacyLayout(); // Change resource symbol into undef. UpdateResourceSymbols(); // Remove unused createHandleForLib functions. dxilutil::RemoveUnusedFunctions(M, DM.GetEntryFunction(), DM.GetPatchConstantFunction(), m_bIsLib); return bChanged; } private: bool RemovePhiOnResource(); void UpdateResourceSymbols(); void TranslateDxilResourceUses(DxilResourceBase &res); void GenerateDxilResourceHandles(); void UpdateStructTypeForLegacyLayout(); // Switch CBuffer for SRV for TBuffers. bool PatchTBuffers(DxilModule &DM); void PatchTBufferUse(Value *V, DxilModule &DM); }; } // namespace // Phi on resource. namespace { typedef std::unordered_map ValueToValueMap; typedef llvm::SetVector ValueSetVector; typedef llvm::SmallVector IndexVector; typedef std::unordered_map ValueToIdxMap; //#define SUPPORT_SELECT_ON_ALLOCA // Errors: class ResourceUseErrors { bool m_bErrorsReported; public: ResourceUseErrors() : m_bErrorsReported(false) {} enum ErrorCode { // Collision between use of one resource GV and another. // All uses must be guaranteed to resolve to only one GV. // Additionally, when writing resource to alloca, all uses // of that alloca are considered resolving to a single GV. GVConflicts, // static global resources are disallowed for libraries at this time. // for non-library targets, they should have been eliminated already. StaticGVUsed, // user function calls with resource params or return type are // are currently disallowed for libraries. UserCallsWithResources, // When searching up from store pointer looking for alloca, // we encountered an unexpted value type UnexpectedValuesFromStorePointer, // When remapping values to be replaced, we add them to RemappedValues // so we don't use dead values stored in other sets/maps. Circular // remaps that should not happen are aadded to RemappingCyclesDetected. RemappingCyclesDetected, // Without SUPPORT_SELECT_ON_ALLOCA, phi/select on alloca based // pointer is disallowed, since this scenario is still untested. // This error also covers any other unknown alloca pointer uses. // Supported: // alloca (-> gep)? -> load -> ... // alloca (-> gep)? -> store. // Unsupported without SUPPORT_SELECT_ON_ALLOCA: // alloca (-> gep)? -> phi/select -> ... AllocaUserDisallowed, #ifdef SUPPORT_SELECT_ON_ALLOCA // Conflict in select/phi between GV pointer and alloca pointer. This // algorithm can't handle this case. AllocaSelectConflict, #endif ErrorCodeCount }; const StringRef ErrorText[ErrorCodeCount] = { "local resource not guaranteed to map to unique global resource.", "static global resource use is disallowed for library functions.", "exported library functions cannot have resource parameters or return value.", "internal error: unexpected instruction type when looking for alloca from store.", "internal error: cycles detected in value remapping.", "phi/select disallowed on pointers to local resources." #ifdef SUPPORT_SELECT_ON_ALLOCA ,"unable to resolve merge of global and local resource pointers." #endif }; ValueSetVector ErrorSets[ErrorCodeCount]; // Ulitimately, the goal of ErrorUsers is to mark all create handles // so we don't try to report errors on them again later. std::unordered_set ErrorUsers; // users of error values bool AddErrorUsers(Value* V) { auto it = ErrorUsers.insert(V); if (!it.second) return false; // already there if (isa(V) || isa(V) || isa(V) || isa(V) || isa(V)) { for (auto U : V->users()) { AddErrorUsers(U); } } else if(isa(V)) { AddErrorUsers(cast(V)->getPointerOperand()); } // create handle will be marked, but users not followed return true; } void ReportError(ErrorCode ec, Value* V) { DXASSERT_NOMSG(ec < ErrorCodeCount); if (!ErrorSets[ec].insert(V)) return; // Error already reported AddErrorUsers(V); m_bErrorsReported = true; if (Instruction *I = dyn_cast(V)) { dxilutil::EmitErrorOnInstruction(I, ErrorText[ec]); } else { StringRef Name = V->getName(); std::string escName; if (isa(V)) { llvm::raw_string_ostream os(escName); dxilutil::PrintEscapedString(Name, os); os.flush(); Name = escName; } Twine msg = Twine(ErrorText[ec]) + " Value: " + Name; V->getContext().emitError(msg); } } bool ErrorsReported() { return m_bErrorsReported; } }; unsigned CountArrayDimensions(Type* Ty, // Optionally collect dimensions SmallVector *dims = nullptr) { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); unsigned dim = 0; if (dims) dims->clear(); while (Ty->isArrayTy()) { if (dims) dims->push_back(Ty->getArrayNumElements()); dim++; Ty = Ty->getArrayElementType(); } return dim; } // Helper class for legalizing resource use // Convert select/phi on resources to select/phi on index to GEP on GV. // Convert resource alloca to index alloca. // Assumes createHandleForLib has no select/phi class LegalizeResourceUseHelper { // Change: // gep1 = GEP gRes, i1 // res1 = load gep1 // gep2 = GEP gRes, i2 // gep3 = GEP gRes, i3 // gep4 = phi gep2, gep3 <-- handle select/phi on GEP // res4 = load gep4 // res5 = phi res1, res4 // res6 = load GEP gRes, 23 <-- handle constant GepExpression // res = select cnd2, res5, res6 // handle = createHandleForLib(res) // To: // i4 = phi i2, i3 // i5 = phi i1, i4 // i6 = select cnd, i5, 23 // gep = GEP gRes, i6 // res = load gep // handle = createHandleForLib(res) // Also handles alloca // resArray = alloca [2 x Resource] // gep1 = GEP gRes, i1 // res1 = load gep1 // gep2 = GEP gRes, i2 // gep3 = GEP gRes, i3 // phi4 = phi gep2, gep3 // res4 = load phi4 // gep5 = GEP resArray, 0 // gep6 = GEP resArray, 1 // store gep5, res1 // store gep6, res4 // gep7 = GEP resArray, i7 <-- dynamically index array // res = load gep7 // handle = createHandleForLib(res) // Desired result: // idxArray = alloca [2 x i32] // phi4 = phi i2, i3 // gep5 = GEP idxArray, 0 // gep6 = GEP idxArray, 1 // store gep5, i1 // store gep6, phi4 // gep7 = GEP idxArray, i7 // gep8 = GEP gRes, gep7 // res = load gep8 // handle = createHandleForLib(res) // Also handles multi-dim resource index and multi-dim resource array allocas // Basic algorithm: // - recursively mark each GV user with GV (ValueToResourceGV) // - verify only one GV used for any given value // - handle allocas by searching up from store for alloca // - then recursively mark alloca users // - ResToIdxReplacement keeps track of vector of indices that // will be used to replace a given resource value or pointer // - Next, create selects/phis for indices corresponding to // selects/phis on resource pointers or values. // - leave incoming index values undef for now // - Create index allocas to replace resource allocas // - Create GEPs on index allocas to replace GEPs on resource allocas // - Create index loads on index allocas to replace loads on resource alloca GEP // - Fill in replacements for GEPs on resource GVs // - copy replacement index vectors to corresponding loads // - Create index stores to replace resource stores to alloca/GEPs // - Update selects/phis incoming index values // - SimplifyMerges: replace index phis/selects on same value with that value // - RemappedValues[phi/select] set to replacement value // - use LookupValue from now on when reading from ResToIdxReplacement // - Update handles by replacing load/GEP chains that go through select/phi // with direct GV GEP + load, with select/phi on GEP indices instead. public: ResourceUseErrors m_Errors; ValueToValueMap ValueToResourceGV; ValueToIdxMap ResToIdxReplacement; // Value sets we can use to iterate ValueSetVector Selects, GEPs, Stores, Handles; ValueSetVector Allocas, AllocaGEPs, AllocaLoads; #ifdef SUPPORT_SELECT_ON_ALLOCA ValueSetVector AllocaSelects; #endif std::unordered_set NonUniformSet; // New index selects created by pass, so we can try simplifying later ValueSetVector NewSelects; // Values that have been replaced with other values need remapping ValueToValueMap RemappedValues; // Things to clean up if no users: std::unordered_set CleanupInsts; GlobalVariable *LookupResourceGV(Value *V) { auto itGV = ValueToResourceGV.find(V); if (itGV == ValueToResourceGV.end()) return nullptr; return cast(itGV->second); } // Follow RemappedValues, return input if not remapped Value *LookupValue(Value *V) { auto it = RemappedValues.find(V); SmallPtrSet visited; while (it != RemappedValues.end()) { // Cycles should not happen, but are bad if they do. if (visited.count(it->second)) { DXASSERT(false, "otherwise, circular remapping"); m_Errors.ReportError(ResourceUseErrors::RemappingCyclesDetected, V); break; } V = it->second; it = RemappedValues.find(V); if (it != RemappedValues.end()) visited.insert(V); } return V; } bool AreLoadUsersTrivial(LoadInst *LI) { for (auto U : LI->users()) { if (CallInst *CI = dyn_cast(U)) { Function *F = CI->getCalledFunction(); DxilModule &DM = F->getParent()->GetDxilModule(); hlsl::OP *hlslOP = DM.GetOP(); if (hlslOP->IsDxilOpFunc(F)) { hlsl::OP::OpCodeClass opClass; if (hlslOP->GetOpCodeClass(F, opClass) && opClass == DXIL::OpCodeClass::CreateHandleForLib) { continue; } } } return false; } return true; } // This is used to quickly skip the common case where no work is needed bool AreGEPUsersTrivial(GEPOperator *GEP) { if (GlobalVariable *GV = LookupResourceGV(GEP)) { if (GEP->getPointerOperand() != LookupResourceGV(GEP)) return false; } for (auto U : GEP->users()) { if (LoadInst *LI = dyn_cast(U)) { if (AreLoadUsersTrivial(LI)) continue; } return false; } return true; } // AssignResourceGVFromStore is used on pointer being stored to. // Follow GEP/Phi/Select up to Alloca, then CollectResourceGVUsers on Alloca void AssignResourceGVFromStore(GlobalVariable *GV, Value *V, SmallPtrSet &visited, bool bNonUniform) { // Prevent cycles as we search up if (visited.count(V) != 0) return; // Verify and skip if already processed auto it = ValueToResourceGV.find(V); if (it != ValueToResourceGV.end()) { if (it->second != GV) { m_Errors.ReportError(ResourceUseErrors::GVConflicts, V); } return; } if (AllocaInst *AI = dyn_cast(V)) { CollectResourceGVUsers(GV, AI, /*bAlloca*/true, bNonUniform); return; } else if (GEPOperator *GEP = dyn_cast(V)) { // follow the pointer up AssignResourceGVFromStore(GV, GEP->getPointerOperand(), visited, bNonUniform); return; } else if (PHINode *Phi = dyn_cast(V)) { #ifdef SUPPORT_SELECT_ON_ALLOCA // follow all incoming values for (auto it : Phi->operand_values()) AssignResourceGVFromStore(GV, it, visited, bNonUniform); #else m_Errors.ReportError(ResourceUseErrors::AllocaUserDisallowed, V); #endif return; } else if (SelectInst *Sel = dyn_cast(V)) { #ifdef SUPPORT_SELECT_ON_ALLOCA // follow all incoming values AssignResourceGVFromStore(GV, Sel->getTrueValue(), visited, bNonUniform); AssignResourceGVFromStore(GV, Sel->getFalseValue(), visited, bNonUniform); #else m_Errors.ReportError(ResourceUseErrors::AllocaUserDisallowed, V); #endif return; } else if (isa(V) && cast(V)->getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage) { // this is writing to global static, which is disallowed at this point. m_Errors.ReportError(ResourceUseErrors::StaticGVUsed, V); return; } else { // Most likely storing to output parameter m_Errors.ReportError(ResourceUseErrors::UserCallsWithResources, V); return; } return; } // Recursively mark values with GV, following users. // Starting value V should be GV itself. // Returns true if value/uses reference no other GV in map. void CollectResourceGVUsers(GlobalVariable *GV, Value *V, bool bAlloca = false, bool bNonUniform = false) { // Recursively tag value V and its users as using GV. auto it = ValueToResourceGV.find(V); if (it != ValueToResourceGV.end()) { if (it->second != GV) { m_Errors.ReportError(ResourceUseErrors::GVConflicts, V); #ifdef SUPPORT_SELECT_ON_ALLOCA } else { // if select/phi, make sure bAlloca is consistent if (isa(V) || isa(V)) if ((bAlloca && AllocaSelects.count(V) == 0) || (!bAlloca && Selects.count(V) == 0)) m_Errors.ReportError(ResourceUseErrors::AllocaSelectConflict, V); #endif } return; } ValueToResourceGV[V] = GV; if (GV == V) { // Just add and recurse users // make sure bAlloca is clear for users bAlloca = false; } else if (GEPOperator *GEP = dyn_cast(V)) { if (bAlloca) AllocaGEPs.insert(GEP); else if (!AreGEPUsersTrivial(GEP)) GEPs.insert(GEP); else return; // Optimization: skip trivial GV->GEP->load->createHandle if (GetElementPtrInst *GEPInst = dyn_cast(GEP)) { if (DxilMDHelper::IsMarkedNonUniform(GEPInst)) bNonUniform = true; } } else if (LoadInst *LI = dyn_cast(V)) { if (bAlloca) AllocaLoads.insert(LI); // clear bAlloca for users bAlloca = false; if (bNonUniform) NonUniformSet.insert(LI); } else if (StoreInst *SI = dyn_cast(V)) { Stores.insert(SI); if (!bAlloca) { // Find and mark allocas this store could be storing to SmallPtrSet visited; AssignResourceGVFromStore(GV, SI->getPointerOperand(), visited, bNonUniform); } return; } else if (PHINode *Phi = dyn_cast(V)) { if (bAlloca) { #ifdef SUPPORT_SELECT_ON_ALLOCA AllocaSelects.insert(Phi); #else m_Errors.ReportError(ResourceUseErrors::AllocaUserDisallowed, V); #endif } else { Selects.insert(Phi); } } else if (SelectInst *Sel = dyn_cast(V)) { if (bAlloca) { #ifdef SUPPORT_SELECT_ON_ALLOCA AllocaSelects.insert(Sel); #else m_Errors.ReportError(ResourceUseErrors::AllocaUserDisallowed, V); #endif } else { Selects.insert(Sel); } } else if (AllocaInst *AI = dyn_cast(V)) { Allocas.insert(AI); // set bAlloca for users bAlloca = true; } else if (Constant *C = dyn_cast(V)) { // skip @llvm.used entry return; } else if (bAlloca) { m_Errors.ReportError(ResourceUseErrors::AllocaUserDisallowed, V); } else { // Must be createHandleForLib or user function call. CallInst *CI = cast(V); Function *F = CI->getCalledFunction(); DxilModule &DM = GV->getParent()->GetDxilModule(); hlsl::OP *hlslOP = DM.GetOP(); if (hlslOP->IsDxilOpFunc(F)) { hlsl::OP::OpCodeClass opClass; if (hlslOP->GetOpCodeClass(F, opClass) && opClass == DXIL::OpCodeClass::CreateHandleForLib) { Handles.insert(CI); if (bNonUniform) NonUniformSet.insert(CI); return; } } // This could be user call with resource param, which is disallowed for lib_6_3 m_Errors.ReportError(ResourceUseErrors::UserCallsWithResources, V); return; } // Recurse users for (auto U : V->users()) CollectResourceGVUsers(GV, U, bAlloca, bNonUniform); return; } // Remove conflicting values from sets before // transforming the remainder. void RemoveConflictingValue(Value* V) { bool bRemoved = false; if (isa(V)) { bRemoved = GEPs.remove(V) || AllocaGEPs.remove(V); } else if (isa(V)) { bRemoved = AllocaLoads.remove(V); } else if (isa(V)) { bRemoved = Stores.remove(V); } else if (isa(V) || isa(V)) { bRemoved = Selects.remove(V); #ifdef SUPPORT_SELECT_ON_ALLOCA bRemoved |= AllocaSelects.remove(V); #endif } else if (isa(V)) { bRemoved = Allocas.remove(V); } else if (isa(V)) { bRemoved = Handles.remove(V); return; // don't recurse } if (bRemoved) { // Recurse users for (auto U : V->users()) RemoveConflictingValue(U); } } void RemoveConflicts() { for (auto V : m_Errors.ErrorSets[ResourceUseErrors::GVConflicts]) { RemoveConflictingValue(V); ValueToResourceGV.erase(V); } } void CreateSelects() { if (Selects.empty() #ifdef SUPPORT_SELECT_ON_ALLOCA && AllocaSelects.empty() #endif ) return; LLVMContext &Ctx = #ifdef SUPPORT_SELECT_ON_ALLOCA Selects.empty() ? AllocaSelects[0]->getContext() : #endif Selects[0]->getContext(); Type *i32Ty = IntegerType::getInt32Ty(Ctx); #ifdef SUPPORT_SELECT_ON_ALLOCA for (auto &SelectSet : {Selects, AllocaSelects}) { bool bAlloca = !(&SelectSet == &Selects); #else for (auto &SelectSet : { Selects }) { #endif for (auto pValue : SelectSet) { Type *SelectTy = i32Ty; #ifdef SUPPORT_SELECT_ON_ALLOCA // For alloca case, type needs to match dimensionality of incoming value if (bAlloca) { // TODO: Not sure if this case will actually work // (or whether it can even be generated from HLSL) Type *Ty = pValue->getType(); SmallVector dims; unsigned dim = CountArrayDimensions(Ty, &dims); for (unsigned i = 0; i < dim; i++) SelectTy = ArrayType::get(SelectTy, (uint64_t)dims[dim - i - 1]); if (Ty->isPointerTy()) SelectTy = PointerType::get(SelectTy, 0); } #endif Value *UndefValue = UndefValue::get(SelectTy); if (PHINode *Phi = dyn_cast(pValue)) { GlobalVariable *GV = LookupResourceGV(Phi); if (!GV) continue; // skip value removed due to conflict IRBuilder<> PhiBuilder(Phi); unsigned gvDim = CountArrayDimensions(GV->getType()); IndexVector &idxVector = ResToIdxReplacement[Phi]; idxVector.resize(gvDim, nullptr); unsigned numIncoming = Phi->getNumIncomingValues(); for (unsigned i = 0; i < gvDim; i++) { PHINode *newPhi = PhiBuilder.CreatePHI(SelectTy, numIncoming); NewSelects.insert(newPhi); idxVector[i] = newPhi; for (unsigned j = 0; j < numIncoming; j++) { // Set incoming values to undef until next pass newPhi->addIncoming(UndefValue, Phi->getIncomingBlock(j)); } } } else if (SelectInst *Sel = dyn_cast(pValue)) { GlobalVariable *GV = LookupResourceGV(Sel); if (!GV) continue; // skip value removed due to conflict IRBuilder<> Builder(Sel); unsigned gvDim = CountArrayDimensions(GV->getType()); IndexVector &idxVector = ResToIdxReplacement[Sel]; idxVector.resize(gvDim, nullptr); for (unsigned i = 0; i < gvDim; i++) { Value *newSel = Builder.CreateSelect(Sel->getCondition(), UndefValue, UndefValue); NewSelects.insert(newSel); idxVector[i] = newSel; } } else { DXASSERT(false, "otherwise, non-select/phi in Selects set"); } } } } // Create index allocas to replace resource allocas void CreateIndexAllocas() { if (Allocas.empty()) return; Type *i32Ty = IntegerType::getInt32Ty(Allocas[0]->getContext()); for (auto pValue : Allocas) { AllocaInst *pAlloca = cast(pValue); GlobalVariable *GV = LookupResourceGV(pAlloca); if (!GV) continue; // skip value removed due to conflict IRBuilder<> AllocaBuilder(pAlloca); unsigned gvDim = CountArrayDimensions(GV->getType()); SmallVector dimVector; unsigned allocaTyDim = CountArrayDimensions(pAlloca->getType(), &dimVector); Type *pIndexType = i32Ty; for (unsigned i = 0; i < allocaTyDim; i++) { pIndexType = ArrayType::get(pIndexType, dimVector[allocaTyDim - i - 1]); } Value *arraySize = pAlloca->getArraySize(); IndexVector &idxVector = ResToIdxReplacement[pAlloca]; idxVector.resize(gvDim, nullptr); for (unsigned i = 0; i < gvDim; i++) { AllocaInst *pAlloca = AllocaBuilder.CreateAlloca(pIndexType, arraySize); pAlloca->setAlignment(4); idxVector[i] = pAlloca; } } } // Add corresponding GEPs for index allocas IndexVector &ReplaceAllocaGEP(GetElementPtrInst *GEP) { IndexVector &idxVector = ResToIdxReplacement[GEP]; if (!idxVector.empty()) return idxVector; Value *Ptr = GEP->getPointerOperand(); // Recurse for partial GEPs IndexVector &ptrIndices = isa(Ptr) ? ReplaceAllocaGEP(cast(Ptr)) : ResToIdxReplacement[Ptr]; IRBuilder<> Builder(GEP); SmallVector gepIndices; for (auto it = GEP->idx_begin(), idxEnd = GEP->idx_end(); it != idxEnd; it++) gepIndices.push_back(*it); idxVector.resize(ptrIndices.size(), nullptr); for (unsigned i = 0; i < ptrIndices.size(); i++) { idxVector[i] = Builder.CreateInBoundsGEP(ptrIndices[i], gepIndices); } return idxVector; } void ReplaceAllocaGEPs() { for (auto V : AllocaGEPs) { ReplaceAllocaGEP(cast(V)); } } void ReplaceAllocaLoads() { for (auto V : AllocaLoads) { LoadInst *LI = cast(V); Value *Ptr = LI->getPointerOperand(); IRBuilder<> Builder(LI); IndexVector &idxVector = ResToIdxReplacement[V]; IndexVector &ptrIndices = ResToIdxReplacement[Ptr]; idxVector.resize(ptrIndices.size(), nullptr); for (unsigned i = 0; i < ptrIndices.size(); i++) { idxVector[i] = Builder.CreateLoad(ptrIndices[i]); } } } // Add GEP to ResToIdxReplacement with indices from incoming + GEP IndexVector &ReplaceGVGEPs(GEPOperator *GEP) { IndexVector &idxVector = ResToIdxReplacement[GEP]; // Skip if already done // (we recurse into partial GEP and iterate all GEPs) if (!idxVector.empty()) return idxVector; Type *i32Ty = IntegerType::getInt32Ty(GEP->getContext()); Constant *Zero = Constant::getIntegerValue(i32Ty, APInt(32, 0)); Value *Ptr = GEP->getPointerOperand(); unsigned idx = 0; if (GlobalVariable *GV = dyn_cast(Ptr)) { unsigned gvDim = CountArrayDimensions(GV->getType()); idxVector.resize(gvDim, Zero); } else if (isa(Ptr) || isa(Ptr) || isa(Ptr)) { // Recurse for partial GEPs IndexVector &ptrIndices = isa(Ptr) ? ReplaceGVGEPs(cast(Ptr)) : ResToIdxReplacement[Ptr]; unsigned ptrDim = CountArrayDimensions(Ptr->getType()); unsigned gvDim = ptrIndices.size(); DXASSERT(ptrDim <= gvDim, "otherwise incoming pointer has more dimensions than associated GV"); unsigned gepStart = gvDim - ptrDim; // Copy indices and add ours idxVector.resize(ptrIndices.size(), Zero); for (; idx < gepStart; idx++) idxVector[idx] = ptrIndices[idx]; } if (GEP->hasIndices()) { auto itIdx = GEP->idx_begin(); ++itIdx; // Always skip leading zero (we don't support GV+n pointer arith) while (itIdx != GEP->idx_end()) idxVector[idx++] = *itIdx++; } return idxVector; } // Add GEPs to ResToIdxReplacement and update loads void ReplaceGVGEPs() { if (GEPs.empty()) return; for (auto V : GEPs) { GEPOperator *GEP = cast(V); IndexVector &gepVector = ReplaceGVGEPs(GEP); for (auto U : GEP->users()) { if (LoadInst *LI = dyn_cast(U)) { // Just copy incoming indices ResToIdxReplacement[LI] = gepVector; } } } } // Create new index stores for incoming indices void ReplaceStores() { // generate stores of incoming indices to corresponding index pointers if (Stores.empty()) return; for (auto V : Stores) { StoreInst *SI = cast(V); IRBuilder<> Builder(SI); IndexVector &idxVector = ResToIdxReplacement[SI]; Value *Ptr = SI->getPointerOperand(); Value *Val = SI->getValueOperand(); IndexVector &ptrIndices = ResToIdxReplacement[Ptr]; IndexVector &valIndices = ResToIdxReplacement[Val]; DXASSERT_NOMSG(ptrIndices.size() == valIndices.size()); idxVector.resize(ptrIndices.size(), nullptr); for (unsigned i = 0; i < idxVector.size(); i++) { idxVector[i] = Builder.CreateStore(valIndices[i], ptrIndices[i]); } } } // For each Phi/Select: update matching incoming values for new phis void UpdateSelects() { for (auto V : Selects) { // update incoming index values corresponding to incoming resource values IndexVector &idxVector = ResToIdxReplacement[V]; Instruction *I = cast(V); unsigned numOperands = I->getNumOperands(); unsigned startOp = isa(V) ? 0 : 1; for (unsigned iOp = startOp; iOp < numOperands; iOp++) { IndexVector &incomingIndices = ResToIdxReplacement[I->getOperand(iOp)]; DXASSERT_NOMSG(idxVector.size() == incomingIndices.size()); for (unsigned i = 0; i < idxVector.size(); i++) { // must be instruction (phi/select) Instruction *indexI = cast(idxVector[i]); indexI->setOperand(iOp, incomingIndices[i]); } // Now clear incoming operand (adding to cleanup) to break cycles if (Instruction *OpI = dyn_cast(I->getOperand(iOp))) CleanupInsts.insert(OpI); I->setOperand(iOp, UndefValue::get(I->getType())); } } } // ReplaceHandles // - iterate handles // - insert GEP using new indices associated with resource value // - load resource from new GEP // - replace resource use in createHandleForLib with new load // Assumes: no users of handle are phi/select or store void ReplaceHandles() { if (Handles.empty()) return; Type *i32Ty = IntegerType::getInt32Ty(Handles[0]->getContext()); Constant *Zero = Constant::getIntegerValue(i32Ty, APInt(32, 0)); for (auto V : Handles) { CallInst *CI = cast(V); DxilInst_CreateHandleForLib createHandle(CI); Value *res = createHandle.get_Resource(); // Skip extra work if nothing between load and create handle if (LoadInst *LI = dyn_cast(res)) { Value *Ptr = LI->getPointerOperand(); if (GEPOperator *GEP = dyn_cast(Ptr)) Ptr = GEP->getPointerOperand(); if (isa(Ptr)) continue; } GlobalVariable *GV = LookupResourceGV(res); if (!GV) continue; // skip value removed due to conflict IRBuilder<> Builder(CI); IndexVector &idxVector = ResToIdxReplacement[res]; DXASSERT(idxVector.size() == CountArrayDimensions(GV->getType()), "replacements empty or invalid"); SmallVector gepIndices; gepIndices.push_back(Zero); for (auto idxVal : idxVector) gepIndices.push_back(LookupValue(idxVal)); Value *GEP = Builder.CreateInBoundsGEP(GV, gepIndices); // Mark new GEP instruction non-uniform if necessary if (NonUniformSet.count(res) != 0 || NonUniformSet.count(CI) != 0) if (GetElementPtrInst *GEPInst = dyn_cast(GEP)) DxilMDHelper::MarkNonUniform(GEPInst); LoadInst *LI = Builder.CreateLoad(GEP); createHandle.set_Resource(LI); if (Instruction *resI = dyn_cast(res)) CleanupInsts.insert(resI); } } // Delete unused CleanupInsts, restarting when changed // Return true if something was deleted bool CleanupUnusedValues() { // - delete unused CleanupInsts, restarting when changed bool bAnyChanges = false; bool bChanged = false; do { bChanged = false; for (auto it = CleanupInsts.begin(); it != CleanupInsts.end();) { Instruction *I = *(it++); if (I->user_empty()) { // Add instructions operands CleanupInsts for (unsigned iOp = 0; iOp < I->getNumOperands(); iOp++) { if (Instruction *opI = dyn_cast(I->getOperand(iOp))) CleanupInsts.insert(opI); } I->eraseFromParent(); CleanupInsts.erase(I); bChanged = true; } } if (bChanged) bAnyChanges = true; } while (bChanged); return bAnyChanges; } void SimplifyMerges() { // Loop if changed bool bChanged = false; do { bChanged = false; for (auto V : NewSelects) { if (LookupValue(V) != V) continue; Instruction *I = cast(V); unsigned startOp = isa(I) ? 0 : 1; Value *newV = dxilutil::MergeSelectOnSameValue( cast(V), startOp, I->getNumOperands()); if (newV) { RemappedValues[V] = newV; bChanged = true; } } } while (bChanged); } void CleanupDeadInsts() { // Assuming everything was successful: // delete stores to allocas to remove cycles for (auto V : Stores) { StoreInst *SI = cast(V); if (Instruction *I = dyn_cast(SI->getValueOperand())) CleanupInsts.insert(I); if (Instruction *I = dyn_cast(SI->getPointerOperand())) CleanupInsts.insert(I); SI->eraseFromParent(); } CleanupUnusedValues(); } void VerifyComplete(DxilModule &DM) { // Check that all handles now resolve to a global variable, otherwise, // they are likely loading from resource function parameter, which // is disallowed. hlsl::OP *hlslOP = DM.GetOP(); for (Function &F : DM.GetModule()->functions()) { if (hlslOP->IsDxilOpFunc(&F)) { hlsl::OP::OpCodeClass opClass; if (hlslOP->GetOpCodeClass(&F, opClass) && opClass == DXIL::OpCodeClass::CreateHandleForLib) { for (auto U : F.users()) { CallInst *CI = cast(U); if (m_Errors.ErrorUsers.count(CI)) continue; // Error already reported DxilInst_CreateHandleForLib createHandle(CI); Value *res = createHandle.get_Resource(); LoadInst *LI = dyn_cast(res); if (LI) { Value *Ptr = LI->getPointerOperand(); if (GEPOperator *GEP = dyn_cast(Ptr)) Ptr = GEP->getPointerOperand(); if (isa(Ptr)) continue; } // handle wasn't processed // Right now, the most likely cause is user call with resources, but // this should be updated if there are other reasons for this to happen. m_Errors.ReportError(ResourceUseErrors::UserCallsWithResources, U); } } } } } // Fix resource global variable properties to external constant bool SetExternalConstant(GlobalVariable *GV) { if (GV->hasInitializer() || !GV->isConstant() || GV->getLinkage() != GlobalVariable::LinkageTypes::ExternalLinkage) { GV->setInitializer(nullptr); GV->setConstant(true); GV->setLinkage(GlobalVariable::LinkageTypes::ExternalLinkage); return true; } return false; } bool CollectResources(DxilModule &DM) { bool bChanged = false; for (const auto &res : DM.GetCBuffers()) { if (GlobalVariable *GV = dyn_cast(res->GetGlobalSymbol())) { bChanged |= SetExternalConstant(GV); CollectResourceGVUsers(GV, GV); } } for (const auto &res : DM.GetSRVs()) { if (GlobalVariable *GV = dyn_cast(res->GetGlobalSymbol())) { bChanged |= SetExternalConstant(GV); CollectResourceGVUsers(GV, GV); } } for (const auto &res : DM.GetUAVs()) { if (GlobalVariable *GV = dyn_cast(res->GetGlobalSymbol())) { bChanged |= SetExternalConstant(GV); CollectResourceGVUsers(GV, GV); } } for (const auto &res : DM.GetSamplers()) { if (GlobalVariable *GV = dyn_cast(res->GetGlobalSymbol())) { bChanged |= SetExternalConstant(GV); CollectResourceGVUsers(GV, GV); } } return bChanged; } void DoTransform() { RemoveConflicts(); CreateSelects(); CreateIndexAllocas(); ReplaceAllocaGEPs(); ReplaceAllocaLoads(); ReplaceGVGEPs(); ReplaceStores(); UpdateSelects(); SimplifyMerges(); ReplaceHandles(); if (!m_Errors.ErrorsReported()) CleanupDeadInsts(); } bool ErrorsReported() { return m_Errors.ErrorsReported(); } bool runOnModule(llvm::Module &M) { DxilModule &DM = M.GetOrCreateDxilModule(); bool bChanged = CollectResources(DM); // If no selects or allocas are involved, there isn't anything to do if (Selects.empty() && Allocas.empty()) return bChanged; DoTransform(); VerifyComplete(DM); return true; } }; class DxilLegalizeResources : public ModulePass { public: static char ID; // Pass identification, replacement for typeid explicit DxilLegalizeResources() : ModulePass(ID) {} const char *getPassName() const override { return "DXIL Legalize Resource Use"; } bool runOnModule(Module &M) override { LegalizeResourceUseHelper helper; return helper.runOnModule(M); } private: }; } // namespace char DxilLegalizeResources::ID = 0; ModulePass *llvm::createDxilLegalizeResources() { return new DxilLegalizeResources(); } INITIALIZE_PASS(DxilLegalizeResources, "hlsl-dxil-legalize-resources", "DXIL legalize resource use", false, false) bool DxilLowerCreateHandleForLib::RemovePhiOnResource() { LegalizeResourceUseHelper helper; bool bChanged = helper.runOnModule(*m_DM->GetModule()); if (helper.ErrorsReported()) m_bLegalizationFailed = true; return bChanged; } // LegacyLayout. namespace { StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf, DxilTypeSystem &TypeSys, Module &M); Type *UpdateFieldTypeForLegacyLayout(Type *Ty, bool IsCBuf, DxilFieldAnnotation &annotation, DxilTypeSystem &TypeSys, Module &M) { DXASSERT(!Ty->isPointerTy(), "struct field should not be a pointer"); if (Ty->isArrayTy()) { Type *EltTy = Ty->getArrayElementType(); Type *UpdatedTy = UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M); if (EltTy == UpdatedTy) return Ty; else return ArrayType::get(UpdatedTy, Ty->getArrayNumElements()); } else if (HLMatrixLower::IsMatrixType(Ty)) { DXASSERT(annotation.HasMatrixAnnotation(), "must a matrix"); unsigned rows, cols; Type *EltTy = HLMatrixLower::GetMatrixInfo(Ty, cols, rows); // Get cols and rows from annotation. const DxilMatrixAnnotation &matrix = annotation.GetMatrixAnnotation(); if (matrix.Orientation == MatrixOrientation::RowMajor) { rows = matrix.Rows; cols = matrix.Cols; } else { DXASSERT(matrix.Orientation == MatrixOrientation::ColumnMajor, ""); cols = matrix.Rows; rows = matrix.Cols; } // CBuffer matrix must 4 * 4 bytes align. if (IsCBuf) cols = 4; EltTy = UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M); Type *rowTy = VectorType::get(EltTy, cols); return ArrayType::get(rowTy, rows); } else if (StructType *ST = dyn_cast(Ty)) { return UpdateStructTypeForLegacyLayout(ST, IsCBuf, TypeSys, M); } else if (Ty->isVectorTy()) { Type *EltTy = Ty->getVectorElementType(); Type *UpdatedTy = UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M); if (EltTy == UpdatedTy) return Ty; else return VectorType::get(UpdatedTy, Ty->getVectorNumElements()); } else { Type *i32Ty = Type::getInt32Ty(Ty->getContext()); // Basic types. if (Ty->isHalfTy()) { return Type::getFloatTy(Ty->getContext()); } else if (IntegerType *ITy = dyn_cast(Ty)) { if (ITy->getBitWidth() < 32) return i32Ty; else return Ty; } else return Ty; } } StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf, DxilTypeSystem &TypeSys, Module &M) { bool bUpdated = false; unsigned fieldsCount = ST->getNumElements(); std::vector fieldTypes(fieldsCount); DxilStructAnnotation *SA = TypeSys.GetStructAnnotation(ST); DXASSERT(SA, "must have annotation for struct type"); for (unsigned i = 0; i < fieldsCount; i++) { Type *EltTy = ST->getElementType(i); Type *UpdatedTy = UpdateFieldTypeForLegacyLayout( EltTy, IsCBuf, SA->GetFieldAnnotation(i), TypeSys, M); fieldTypes[i] = UpdatedTy; if (EltTy != UpdatedTy) bUpdated = true; } if (!bUpdated) { return ST; } else { std::string legacyName = "dx.alignment.legacy." + ST->getName().str(); if (StructType *legacyST = M.getTypeByName(legacyName)) return legacyST; StructType *NewST = StructType::create(ST->getContext(), fieldTypes, legacyName); DxilStructAnnotation *NewSA = TypeSys.AddStructAnnotation(NewST); // Clone annotation. *NewSA = *SA; return NewST; } } void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res, DxilTypeSystem &TypeSys, Module &M) { GlobalVariable *GV = cast(Res.GetGlobalSymbol()); Type *Ty = GV->getType()->getPointerElementType(); bool IsResourceArray = Res.GetRangeSize() != 1; if (IsResourceArray) { // Support Array of struct buffer. if (Ty->isArrayTy()) Ty = Ty->getArrayElementType(); } StructType *ST = cast(Ty); if (ST->isOpaque()) { DXASSERT(Res.GetClass() == DxilResourceBase::Class::CBuffer, "Only cbuffer can have opaque struct."); return; } Type *UpdatedST = UpdateStructTypeForLegacyLayout(ST, IsResourceArray, TypeSys, M); if (ST != UpdatedST) { Type *Ty = GV->getType()->getPointerElementType(); if (IsResourceArray) { // Support Array of struct buffer. if (Ty->isArrayTy()) { UpdatedST = ArrayType::get(UpdatedST, Ty->getArrayNumElements()); } } GlobalVariable *NewGV = cast( M.getOrInsertGlobal(GV->getName().str() + "_legacy", UpdatedST)); Res.SetGlobalSymbol(NewGV); // Delete old GV. for (auto UserIt = GV->user_begin(); UserIt != GV->user_end();) { Value *User = *(UserIt++); if (Instruction *I = dyn_cast(User)) { if (!User->user_empty()) I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); } else { ConstantExpr *CE = cast(User); if (!CE->user_empty()) CE->replaceAllUsesWith(UndefValue::get(CE->getType())); } } GV->removeDeadConstantUsers(); GV->eraseFromParent(); } } void UpdateStructTypeForLegacyLayoutOnDM(DxilModule &DM) { DxilTypeSystem &TypeSys = DM.GetTypeSystem(); Module &M = *DM.GetModule(); for (auto &CBuf : DM.GetCBuffers()) { UpdateStructTypeForLegacyLayout(*CBuf.get(), TypeSys, M); } for (auto &UAV : DM.GetUAVs()) { if (UAV->GetKind() == DxilResourceBase::Kind::StructuredBuffer) UpdateStructTypeForLegacyLayout(*UAV.get(), TypeSys, M); } for (auto &SRV : DM.GetSRVs()) { if (SRV->GetKind() == DxilResourceBase::Kind::StructuredBuffer) UpdateStructTypeForLegacyLayout(*SRV.get(), TypeSys, M); } } } // namespace void DxilLowerCreateHandleForLib::UpdateStructTypeForLegacyLayout() { UpdateStructTypeForLegacyLayoutOnDM(*m_DM); } // Change ResourceSymbol to undef if don't need. void DxilLowerCreateHandleForLib::UpdateResourceSymbols() { std::vector &LLVMUsed = m_DM->GetLLVMUsed(); auto UpdateResourceSymbol = [&LLVMUsed, this](DxilResourceBase *res) { GlobalVariable *GV = cast(res->GetGlobalSymbol()); GV->removeDeadConstantUsers(); DXASSERT(GV->user_empty(), "else resource not lowered"); Type *Ty = GV->getType(); res->SetGlobalSymbol(UndefValue::get(Ty)); if (m_HasDbgInfo) LLVMUsed.emplace_back(GV); res->SetGlobalSymbol(UndefValue::get(Ty)); }; for (auto &&C : m_DM->GetCBuffers()) { UpdateResourceSymbol(C.get()); } for (auto &&Srv : m_DM->GetSRVs()) { UpdateResourceSymbol(Srv.get()); } for (auto &&Uav : m_DM->GetUAVs()) { UpdateResourceSymbol(Uav.get()); } for (auto &&S : m_DM->GetSamplers()) { UpdateResourceSymbol(S.get()); } } // Lower createHandleForLib namespace { void ReplaceResourceUserWithHandle( LoadInst *Res, Value *handle) { for (auto resUser = Res->user_begin(); resUser != Res->user_end();) { Value *V = *(resUser++); CallInst *CI = dyn_cast(V); DxilInst_CreateHandleForLib createHandle(CI); DXASSERT(createHandle, "must be createHandle"); CI->replaceAllUsesWith(handle); CI->eraseFromParent(); } Res->eraseFromParent(); } DIGlobalVariable *FindGlobalVariableDebugInfo(GlobalVariable *GV, DebugInfoFinder &DbgInfoFinder) { struct GlobalFinder { GlobalVariable *GV; bool operator()(llvm::DIGlobalVariable *const arg) const { return arg->getVariable() == GV; } }; GlobalFinder F = {GV}; DebugInfoFinder::global_variable_iterator Found = std::find_if(DbgInfoFinder.global_variables().begin(), DbgInfoFinder.global_variables().end(), F); if (Found != DbgInfoFinder.global_variables().end()) { return *Found; } return nullptr; } } // namespace void DxilLowerCreateHandleForLib::TranslateDxilResourceUses( DxilResourceBase &res) { OP *hlslOP = m_DM->GetOP(); Function *createHandle = hlslOP->GetOpFunc( OP::OpCode::CreateHandle, llvm::Type::getVoidTy(m_DM->GetCtx())); Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle); bool isViewResource = res.GetClass() == DXIL::ResourceClass::SRV || res.GetClass() == DXIL::ResourceClass::UAV; bool isROV = isViewResource && static_cast(res).IsROV(); std::string handleName = (res.GetGlobalName() + Twine("_") + Twine(res.GetResClassName())).str(); if (isViewResource) handleName += (Twine("_") + Twine(res.GetResDimName())).str(); if (isROV) handleName += "_ROV"; Value *resClassArg = hlslOP->GetU8Const( static_cast::type>( res.GetClass())); Value *resIDArg = hlslOP->GetU32Const(res.GetID()); // resLowerBound will be added after allocation in DxilCondenseResources. Value *resLowerBound = hlslOP->GetU32Const(res.GetLowerBound()); Value *isUniformRes = hlslOP->GetI1Const(0); Value *GV = res.GetGlobalSymbol(); Module *pM = m_DM->GetModule(); // TODO: add debug info to create handle. DIVariable *DIV = nullptr; DILocation *DL = nullptr; if (m_HasDbgInfo) { DebugInfoFinder &Finder = m_DM->GetOrCreateDebugInfoFinder(); DIV = FindGlobalVariableDebugInfo(cast(GV), Finder); if (DIV) // TODO: how to get col? DL = DILocation::get(pM->getContext(), DIV->getLine(), 1, DIV->getScope()); } bool isResArray = res.GetRangeSize() > 1; std::unordered_map handleMapOnFunction; Value *createHandleArgs[] = {opArg, resClassArg, resIDArg, resLowerBound, isUniformRes}; for (iplist::iterator F : pM->getFunctionList()) { if (!F->isDeclaration()) { if (!isResArray) { IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F)); if (m_HasDbgInfo) { // TODO: set debug info. // Builder.SetCurrentDebugLocation(DL); } handleMapOnFunction[F] = Builder.CreateCall(createHandle, createHandleArgs, handleName); } } } for (auto U = GV->user_begin(), E = GV->user_end(); U != E;) { User *user = *(U++); // Skip unused user. if (user->user_empty()) continue; if (LoadInst *ldInst = dyn_cast(user)) { Function *userF = ldInst->getParent()->getParent(); DXASSERT(handleMapOnFunction.count(userF), "must exist"); Value *handle = handleMapOnFunction[userF]; ReplaceResourceUserWithHandle(ldInst, handle); } else { DXASSERT(dyn_cast(user) != nullptr, "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses " "to only have ld/st refer to temp object"); GEPOperator *GEP = cast(user); Value *idx = nullptr; if (GEP->getNumIndices() == 2) { // one dim array of resource idx = (GEP->idx_begin() + 1)->get(); } else { gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); // Must be instruction for multi dim array. std::unique_ptr > Builder; if (GetElementPtrInst *GEPInst = dyn_cast(GEP)) { Builder = llvm::make_unique >(GEPInst); } else { Builder = llvm::make_unique >(GV->getContext()); } for (; GEPIt != E; ++GEPIt) { if (GEPIt->isArrayTy()) { unsigned arraySize = GEPIt->getArrayNumElements(); Value * tmpIdx = GEPIt.getOperand(); if (idx == nullptr) idx = tmpIdx; else { idx = Builder->CreateMul(idx, Builder->getInt32(arraySize)); idx = Builder->CreateAdd(idx, tmpIdx); } } } } createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = idx; createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] = isUniformRes; Value *handle = nullptr; if (GetElementPtrInst *GEPInst = dyn_cast(GEP)) { IRBuilder<> Builder = IRBuilder<>(GEPInst); if (DxilMDHelper::IsMarkedNonUniform(GEPInst)) { // Mark nonUniform. createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] = hlslOP->GetI1Const(1); // Clear nonUniform on GEP. GEPInst->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, nullptr); } createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = Builder.CreateAdd(idx, resLowerBound); handle = Builder.CreateCall(createHandle, createHandleArgs, handleName); } for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end(); GEPU != GEPE;) { // Must be load inst. LoadInst *ldInst = cast(*(GEPU++)); if (handle) { ReplaceResourceUserWithHandle(ldInst, handle); } else { IRBuilder<> Builder = IRBuilder<>(ldInst); createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = Builder.CreateAdd(idx, resLowerBound); Value *localHandle = Builder.CreateCall(createHandle, createHandleArgs, handleName); ReplaceResourceUserWithHandle(ldInst, localHandle); } } if (Instruction *I = dyn_cast(GEP)) { I->eraseFromParent(); } } } // Erase unused handle. for (auto It : handleMapOnFunction) { Instruction *I = It.second; if (I->user_empty()) I->eraseFromParent(); } } void DxilLowerCreateHandleForLib::GenerateDxilResourceHandles() { for (size_t i = 0; i < m_DM->GetCBuffers().size(); i++) { DxilCBuffer &C = m_DM->GetCBuffer(i); TranslateDxilResourceUses(C); } // Create sampler handle first, may be used by SRV operations. for (size_t i = 0; i < m_DM->GetSamplers().size(); i++) { DxilSampler &S = m_DM->GetSampler(i); TranslateDxilResourceUses(S); } for (size_t i = 0; i < m_DM->GetSRVs().size(); i++) { DxilResource &SRV = m_DM->GetSRV(i); TranslateDxilResourceUses(SRV); } for (size_t i = 0; i < m_DM->GetUAVs().size(); i++) { DxilResource &UAV = m_DM->GetUAV(i); TranslateDxilResourceUses(UAV); } } // TBuffer. namespace { void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) { pDest->SetKind(pSource->GetKind()); pDest->SetCompType(DXIL::ComponentType::U32); pDest->SetSampleCount(0); pDest->SetElementStride(0); pDest->SetGloballyCoherent(false); pDest->SetHasCounter(false); pDest->SetRW(false); pDest->SetROV(false); pDest->SetID(pSource->GetID()); pDest->SetSpaceID(pSource->GetSpaceID()); pDest->SetLowerBound(pSource->GetLowerBound()); pDest->SetRangeSize(pSource->GetRangeSize()); pDest->SetGlobalSymbol(pSource->GetGlobalSymbol()); pDest->SetGlobalName(pSource->GetGlobalName()); pDest->SetHandle(pSource->GetHandle()); } void PatchTBufferLoad(CallInst *handle, DxilModule &DM) { hlsl::OP *hlslOP = DM.GetOP(); llvm::LLVMContext &Ctx = DM.GetCtx(); Type *doubleTy = Type::getDoubleTy(Ctx); Type *i64Ty = Type::getInt64Ty(Ctx); // Replace corresponding cbuffer loads with typed buffer loads for (auto U = handle->user_begin(); U != handle->user_end();) { CallInst *I = cast(*(U++)); DXASSERT(I && OP::IsDxilOpFuncCallInst(I), "otherwise unexpected user of CreateHandle value"); DXIL::OpCode opcode = OP::GetDxilOpFuncCallInst(I); if (opcode == DXIL::OpCode::CBufferLoadLegacy) { DxilInst_CBufferLoadLegacy cbLoad(I); // Replace with appropriate buffer load instruction IRBuilder<> Builder(I); opcode = OP::OpCode::BufferLoad; Type *Ty = Type::getInt32Ty(Ctx); Function *BufLoad = hlslOP->GetOpFunc(opcode, Ty); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *undefI = UndefValue::get(Type::getInt32Ty(Ctx)); Value *offset = cbLoad.get_regIndex(); CallInst *load = Builder.CreateCall(BufLoad, {opArg, handle, offset, undefI}); // Find extractelement uses of cbuffer load and replace + generate bitcast // as necessary for (auto LU = I->user_begin(); LU != I->user_end();) { ExtractValueInst *evInst = dyn_cast(*(LU++)); DXASSERT(evInst && evInst->getNumIndices() == 1, "user of cbuffer load result should be extractvalue"); uint64_t idx = evInst->getIndices()[0]; Type *EltTy = evInst->getType(); IRBuilder<> EEBuilder(evInst); Value *result = nullptr; if (EltTy != Ty) { // extract two values and DXIL::OpCode::MakeDouble or construct i64 if ((EltTy == doubleTy) || (EltTy == i64Ty)) { DXASSERT(idx < 2, "64-bit component index out of range"); // This assumes big endian order in tbuffer elements (is this // correct?) Value *low = EEBuilder.CreateExtractValue(load, idx * 2); Value *high = EEBuilder.CreateExtractValue(load, idx * 2 + 1); if (EltTy == doubleTy) { opcode = OP::OpCode::MakeDouble; Function *MakeDouble = hlslOP->GetOpFunc(opcode, doubleTy); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); result = EEBuilder.CreateCall(MakeDouble, {opArg, low, high}); } else { high = EEBuilder.CreateZExt(high, i64Ty); low = EEBuilder.CreateZExt(low, i64Ty); high = EEBuilder.CreateShl(high, hlslOP->GetU64Const(32)); result = EEBuilder.CreateOr(high, low); } } else { result = EEBuilder.CreateExtractValue(load, idx); result = EEBuilder.CreateBitCast(result, EltTy); } } else { result = EEBuilder.CreateExtractValue(load, idx); } evInst->replaceAllUsesWith(result); evInst->eraseFromParent(); } } else if (opcode == DXIL::OpCode::CBufferLoad) { // TODO: Handle this, or prevent this for tbuffer DXASSERT(false, "otherwise CBufferLoad used for tbuffer rather than " "CBufferLoadLegacy"); } else { DXASSERT(false, "otherwise unexpected user of CreateHandle value"); } I->eraseFromParent(); } } } // namespace void DxilLowerCreateHandleForLib::PatchTBufferUse(Value *V, DxilModule &DM) { for (User *U : V->users()) { if (CallInst *CI = dyn_cast(U)) { // Patch dxil call. if (hlsl::OP::IsDxilOpFuncCallInst(CI)) PatchTBufferLoad(CI, DM); } else { PatchTBufferUse(U, DM); } } } bool DxilLowerCreateHandleForLib::PatchTBuffers(DxilModule &DM) { bool bChanged = false; // move tbuffer resources to SRVs unsigned offset = DM.GetSRVs().size(); Module &M = *DM.GetModule(); for (auto it = DM.GetCBuffers().begin(); it != DM.GetCBuffers().end(); it++) { DxilCBuffer *CB = it->get(); if (CB->GetKind() == DXIL::ResourceKind::TBuffer) { auto srv = make_unique(); InitTBuffer(CB, srv.get()); srv->SetID(offset++); DM.AddSRV(std::move(srv)); GlobalVariable *GV = cast(CB->GetGlobalSymbol()); PatchTBufferUse(GV, DM); // Set global symbol for cbuffer to an unused value so it can be removed // in RemoveUnusedResourceSymbols. Type *Ty = GV->getType()->getElementType(); GlobalVariable *NewGV = new GlobalVariable( M, Ty, GV->isConstant(), GV->getLinkage(), /*Initializer*/ nullptr, GV->getName(), /*InsertBefore*/ nullptr, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(), GV->isExternallyInitialized()); CB->SetGlobalSymbol(NewGV); bChanged = true; } } return bChanged; } char DxilLowerCreateHandleForLib::ID = 0; ModulePass *llvm::createDxilLowerCreateHandleForLibPass() { return new DxilLowerCreateHandleForLib(); } INITIALIZE_PASS(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false) class DxilAllocateResourcesForLib : public ModulePass { private: RemapEntryCollection m_rewrites; public: static char ID; // Pass identification, replacement for typeid explicit DxilAllocateResourcesForLib() : ModulePass(ID), m_AutoBindingSpace(UINT_MAX) {} void applyOptions(PassOptions O) override { GetPassOptionUInt32(O, "auto-binding-space", &m_AutoBindingSpace, UINT_MAX); } const char *getPassName() const override { return "DXIL Condense Resources"; } bool runOnModule(Module &M) override { DxilModule &DM = M.GetOrCreateDxilModule(); // Must specify a default space, and must apply to library. // Use DxilCondenseResources instead for shaders. if ((m_AutoBindingSpace == UINT_MAX) || !DM.GetShaderModel()->IsLib()) return false; bool hasResource = DM.GetCBuffers().size() || DM.GetUAVs().size() || DM.GetSRVs().size() || DM.GetSamplers().size(); if (hasResource) { DM.SetAutoBindingSpace(m_AutoBindingSpace); AllocateDxilResources(DM); } return true; } private: uint32_t m_AutoBindingSpace; }; char DxilAllocateResourcesForLib::ID = 0; ModulePass *llvm::createDxilAllocateResourcesForLibPass() { return new DxilAllocateResourcesForLib(); } INITIALIZE_PASS(DxilAllocateResourcesForLib, "hlsl-dxil-allocate-resources-for-lib", "DXIL Allocate Resources For Library", false, false)