Browse Source

Optimize compile times by not skipping allocas (#3168)

Instead of skipping past allocas whenever inserting a new insruction,
which ate up a lot of compilation time, they are inserted at the default
insertion point.

The result is that allocas that would have coallesced just after the
global load an input loads are dispersed throughout the commands. So as
part of dxil finalization, the allocas are moved to the beginning of the
entry block of each function. This results in some minor changes to a
couple tests due to the allocas preceding the loads.
Greg Roth 4 years ago
parent
commit
9459577e8f

+ 3 - 8
include/dxc/DXIL/DxilUtil.h

@@ -59,14 +59,9 @@ namespace dxilutil {
   bool HasDynamicIndexing(llvm::Value *V);
   bool HasDynamicIndexing(llvm::Value *V);
 
 
   // Find alloca insertion point, given instruction
   // Find alloca insertion point, given instruction
-  llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I); // Considers entire parent function
-  llvm::Instruction *FindAllocaInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
-  llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F);
-  llvm::Instruction *SkipAllocas(llvm::Instruction *I);
-  // Get first non-alloca insertion point, to avoid inserting non-allocas before alloca
-  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I); // Considers entire parent function
-  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
-  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F);
+  llvm::Instruction *FindInsertionPt(llvm::Instruction* I); // Considers entire parent function
+  llvm::Instruction *FindInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
+  llvm::Instruction *FindInsertionPt(llvm::Function* F);
 
 
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);

+ 6 - 21
lib/DXIL/DxilUtil.cpp

@@ -540,33 +540,18 @@ Value *SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx) {
   return nullptr;
   return nullptr;
 }
 }
 
 
-llvm::Instruction *SkipAllocas(llvm::Instruction *I) {
-  // Step past any allocas:
-  while (I && (isa<AllocaInst>(I) || isa<DbgInfoIntrinsic>(I)))
-    I = I->getNextNode();
-  return I;
-}
-llvm::Instruction *FindAllocaInsertionPt(llvm::BasicBlock* BB) {
+llvm::Instruction *FindInsertionPt(llvm::BasicBlock* BB) {
   return &*BB->getFirstInsertionPt();
   return &*BB->getFirstInsertionPt();
 }
 }
-llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F) {
-  return FindAllocaInsertionPt(&F->getEntryBlock());
+llvm::Instruction *FindInsertionPt(llvm::Function* F) {
+  return FindInsertionPt(&F->getEntryBlock());
 }
 }
-llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I) {
+llvm::Instruction *FindInsertionPt(llvm::Instruction* I) {
   Function *F = I->getParent()->getParent();
   Function *F = I->getParent()->getParent();
   if (F)
   if (F)
-    return FindAllocaInsertionPt(F);
+    return FindInsertionPt(F);
   else // BB with no parent function
   else // BB with no parent function
-    return FindAllocaInsertionPt(I->getParent());
-}
-llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I) {
-  return SkipAllocas(FindAllocaInsertionPt(I));
-}
-llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB) {
-  return SkipAllocas(FindAllocaInsertionPt(BB));
-}
-llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
-  return SkipAllocas(FindAllocaInsertionPt(F));
+    return FindInsertionPt(I->getParent());
 }
 }
 
 
 static bool ConsumePrefix(StringRef &Str, StringRef Prefix) {
 static bool ConsumePrefix(StringRef &Str, StringRef Prefix) {

+ 1 - 1
lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp

@@ -100,7 +100,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M) {
   CallInst *HandleForUAV;
   CallInst *HandleForUAV;
   {
   {
     IRBuilder<> Builder(
     IRBuilder<> Builder(
-        dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
+        dxilutil::FindInsertionPt(DM.GetEntryFunction()));
 
 
     unsigned int UAVResourceHandle =
     unsigned int UAVResourceHandle =
         static_cast<unsigned int>(DM.GetUAVs().size());
         static_cast<unsigned int>(DM.GetUAVs().size());

+ 1 - 1
lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

@@ -945,7 +945,7 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
   //
   //
 
 
   Instruction *firstInsertionPt =
   Instruction *firstInsertionPt =
-      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
+      dxilutil::FindInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
   IRBuilder<> Builder(firstInsertionPt);
 
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

+ 1 - 1
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -268,7 +268,7 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
   OP *HlslOP = DM.GetOP();
   OP *HlslOP = DM.GetOP();
 
 
   Instruction *firstInsertionPt =
   Instruction *firstInsertionPt =
-      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
+      dxilutil::FindInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
   IRBuilder<> Builder(firstInsertionPt);
 
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

+ 1 - 1
lib/HLSL/DxilCondenseResources.cpp

@@ -1966,7 +1966,7 @@ void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
   for (iplist<Function>::iterator F : pM->getFunctionList()) {
   for (iplist<Function>::iterator F : pM->getFunctionList()) {
     if (!F->isDeclaration()) {
     if (!F->isDeclaration()) {
       if (!isResArray) {
       if (!isResArray) {
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
         if (m_HasDbgInfo) {
         if (m_HasDbgInfo) {
           // TODO: set debug info.
           // TODO: set debug info.
           // Builder.SetCurrentDebugLocation(DL);
           // Builder.SetCurrentDebugLocation(DL);

+ 1 - 1
lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp

@@ -122,7 +122,7 @@ bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
   if (dynamicSigSet.empty())
   if (dynamicSigSet.empty())
     return false;
     return false;
 
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Entry));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Entry));
 
 
   Value *opcodeV = AllocaBuilder.getInt32(static_cast<unsigned>(opcode));
   Value *opcodeV = AllocaBuilder.getInt32(static_cast<unsigned>(opcode));
   Value *zero = AllocaBuilder.getInt32(0);
   Value *zero = AllocaBuilder.getInt32(0);

+ 3 - 3
lib/HLSL/DxilGenerationPass.cpp

@@ -64,7 +64,7 @@ void SimplifyGlobalSymbol(GlobalVariable *GV) {
     for (auto it : handleMapOnFunction) {
     for (auto it : handleMapOnFunction) {
       Function *F = it.first;
       Function *F = it.first;
       Instruction *I = it.second;
       Instruction *I = it.second;
-      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+      IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
       Value *headLI = Builder.CreateLoad(GV);
       Value *headLI = Builder.CreateLoad(GV);
       I->replaceAllUsesWith(headLI);
       I->replaceAllUsesWith(headLI);
     }
     }
@@ -537,7 +537,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
         // Must HLCreateHandle.
         // Must HLCreateHandle.
         CallInst *CI = cast<CallInst>(*(U++));
         CallInst *CI = cast<CallInst>(*(U++));
         // Put createHandle to entry block.
         // Put createHandle to entry block.
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(CI));
         Value *V = Builder.CreateLoad(GV);
         Value *V = Builder.CreateLoad(GV);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         if (m_HasDbgInfo) {
         if (m_HasDbgInfo) {
@@ -562,7 +562,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         if (isa<ConstantInt>(CBIndex)) {
         if (isa<ConstantInt>(CBIndex)) {
           // Put createHandle to entry block for const index.
           // Put createHandle to entry block for const index.
-          Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
+          Builder.SetInsertPoint(dxilutil::FindInsertionPt(CI));
         }
         }
         // Add GEP for cbv array use.
         // Add GEP for cbv array use.
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});

+ 1 - 1
lib/HLSL/DxilLinker.cpp

@@ -792,7 +792,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   CloneFunctions(vmap);
   CloneFunctions(vmap);
 
 
   // Call global constrctor.
   // Call global constrctor.
-  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
+  IRBuilder<> Builder(dxilutil::FindInsertionPt(DM.GetEntryFunction()));
   for (auto &it : m_functionDefs) {
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
     DxilLib *pLib = it.second;

+ 20 - 1
lib/HLSL/DxilPreparePasses.cpp

@@ -388,6 +388,9 @@ public:
       unsigned DxilMinor = 0;
       unsigned DxilMinor = 0;
       M.GetDxilModule().GetDxilVersion(DxilMajor, DxilMinor);
       M.GetDxilModule().GetDxilVersion(DxilMajor, DxilMinor);
 
 
+      // Move all allocas to the top of the entry block
+      ConsolidateAllocas(M);
+
       bool IsLib = DM.GetShaderModel()->IsLib();
       bool IsLib = DM.GetShaderModel()->IsLib();
       // Skip validation patch for lib.
       // Skip validation patch for lib.
       if (!IsLib) {
       if (!IsLib) {
@@ -446,6 +449,22 @@ public:
   }
   }
 
 
 private:
 private:
+  void ConsolidateAllocas(Module &M) {
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      Instruction *insertPt = nullptr;
+      for (llvm::Instruction &I : llvm::inst_range(&F)) {
+        if (!insertPt) {
+          if (!isa<AllocaInst>(I) && !isa<DbgInfoIntrinsic>(I))
+            insertPt = &I;
+        } else if (isa<AllocaInst>(I)) {
+          I.moveBefore(insertPt);
+        }
+      }
+    }
+  }
+
   void RemoveUnusedStaticGlobal(Module &M) {
   void RemoveUnusedStaticGlobal(Module &M) {
     // Remove unused internal global.
     // Remove unused internal global.
     std::vector<GlobalVariable *> staticGVs;
     std::vector<GlobalVariable *> staticGVs;
@@ -652,7 +671,7 @@ private:
           Function *F = CI->getParent()->getParent();
           Function *F = CI->getParent()->getParent();
           ICmpInst *Cmp = DxBreakCmpMap.lookup(F);
           ICmpInst *Cmp = DxBreakCmpMap.lookup(F);
           if (!Cmp) {
           if (!Cmp) {
-            Instruction *IP = dxilutil::FirstNonAllocaInsertionPt(F);
+            Instruction *IP = dxilutil::FindInsertionPt(F);
             LoadInst *LI = new LoadInst(Gep, nullptr, false, IP);
             LoadInst *LI = new LoadInst(Gep, nullptr, false, IP);
             Cmp = new ICmpInst(IP, ICmpInst::ICMP_EQ, LI, llvm::ConstantInt::get(i32Ty,0));
             Cmp = new ICmpInst(IP, ICmpInst::ICMP_EQ, LI, llvm::ConstantInt::get(i32Ty,0));
             DxBreakCmpMap[F] = Cmp;
             DxBreakCmpMap[F] = Cmp;

+ 3 - 3
lib/HLSL/HLMatrixLowerPass.cpp

@@ -429,7 +429,7 @@ Value *HLMatrixLowerPass::bitCastValue(Value *SrcVal, Type* DstTy, bool DstTyAll
 
 
   // We store and load from a temporary alloca, bitcasting either on the store pointer
   // We store and load from a temporary alloca, bitcasting either on the store pointer
   // or on the load pointer.
   // or on the load pointer.
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
   Value *Alloca = AllocaBuilder.CreateAlloca(DstTyAlloca ? DstTy : SrcTy);
   Value *Alloca = AllocaBuilder.CreateAlloca(DstTyAlloca ? DstTy : SrcTy);
   Value *BitCastedAlloca = Builder.CreateBitCast(Alloca, (DstTyAlloca ? SrcTy : DstTy)->getPointerTo());
   Value *BitCastedAlloca = Builder.CreateBitCast(Alloca, (DstTyAlloca ? SrcTy : DstTy)->getPointerTo());
   Builder.CreateStore(SrcVal, DstTyAlloca ? BitCastedAlloca : Alloca);
   Builder.CreateStore(SrcVal, DstTyAlloca ? BitCastedAlloca : Alloca);
@@ -476,7 +476,7 @@ void HLMatrixLowerPass::replaceAllUsesByLoweredValue(Instruction* MatInst, Value
       Instruction *PrevInst = dyn_cast<Instruction>(VecVal);
       Instruction *PrevInst = dyn_cast<Instruction>(VecVal);
       if (PrevInst == nullptr) PrevInst = MatInst;
       if (PrevInst == nullptr) PrevInst = MatInst;
 
 
-      IRBuilder<> Builder(dxilutil::SkipAllocas(PrevInst->getNextNode()));
+      IRBuilder<> Builder(PrevInst->getNextNode());
       VecToMatStub = Builder.CreateCall(TranslationStub, { VecVal });
       VecToMatStub = Builder.CreateCall(TranslationStub, { VecVal });
     }
     }
 
 
@@ -743,7 +743,7 @@ Value *HLMatrixLowerPass::lowerNonHLCall(CallInst *Call) {
   // The callee returns a matrix, and we don't lower signatures in this pass.
   // The callee returns a matrix, and we don't lower signatures in this pass.
   // We perform a sketchy bitcast to the lowered register-representation type,
   // We perform a sketchy bitcast to the lowered register-representation type,
   // which the later HLMatrixBitcastLower pass knows how to eliminate.
   // which the later HLMatrixBitcastLower pass knows how to eliminate.
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Call));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Call));
   Value *LoweredAlloca = AllocaBuilder.CreateAlloca(RetMatTy.getLoweredVectorTypeForReg());
   Value *LoweredAlloca = AllocaBuilder.CreateAlloca(RetMatTy.getLoweredVectorTypeForReg());
   
   
   IRBuilder<> PostCallBuilder(Call->getNextNode());
   IRBuilder<> PostCallBuilder(Call->getNextNode());

+ 2 - 2
lib/HLSL/HLMatrixSubscriptUseReplacer.cpp

@@ -139,7 +139,7 @@ Value *HLMatrixSubscriptUseReplacer::tryGetScalarIndex(Value *SubIdxVal, IRBuild
   // We need to dynamically index into the level 1 element indices
   // We need to dynamically index into the level 1 element indices
   if (LazyTempElemIndicesArrayAlloca == nullptr) {
   if (LazyTempElemIndicesArrayAlloca == nullptr) {
     // The level 2 index is dynamic, use it to index a temporary array of the level 1 indices.
     // The level 2 index is dynamic, use it to index a temporary array of the level 1 indices.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
     ArrayType *ArrayTy = ArrayType::get(AllocaBuilder.getInt32Ty(), ElemIndices.size());
     ArrayType *ArrayTy = ArrayType::get(AllocaBuilder.getInt32Ty(), ElemIndices.size());
     LazyTempElemIndicesArrayAlloca = AllocaBuilder.CreateAlloca(ArrayTy);
     LazyTempElemIndicesArrayAlloca = AllocaBuilder.CreateAlloca(ArrayTy);
   }
   }
@@ -180,7 +180,7 @@ void HLMatrixSubscriptUseReplacer::cacheLoweredMatrix(bool ForDynamicIndexing, I
   // Lazily create the temporary array alloca
   // Lazily create the temporary array alloca
   if (LazyTempElemArrayAlloca == nullptr) {
   if (LazyTempElemArrayAlloca == nullptr) {
     ArrayType *TempElemArrayTy = ArrayType::get(MatVecTy->getElementType(), MatVecTy->getNumElements());
     ArrayType *TempElemArrayTy = ArrayType::get(MatVecTy->getElementType(), MatVecTy->getNumElements());
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
     LazyTempElemArrayAlloca = AllocaBuilder.CreateAlloca(TempElemArrayTy);
     LazyTempElemArrayAlloca = AllocaBuilder.CreateAlloca(TempElemArrayTy);
   }
   }
 
 

+ 2 - 2
lib/HLSL/HLOperationLower.cpp

@@ -6532,7 +6532,7 @@ static ResRetValueArray GenerateTypedBufferLoad(
 
 
 static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
 static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
   DXASSERT_NOMSG(!Values.empty());
   DXASSERT_NOMSG(!Values.empty());
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
   AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
   AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
   for (unsigned i = 0; i < Values.size(); ++i) {
   for (unsigned i = 0; i < Values.size(); ++i) {
     Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
     Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
@@ -7583,7 +7583,7 @@ static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, b
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
   } else {
   } else {
     // If value, we have to alloca, store to bitcast ptr, and load
     // If value, we have to alloca, store to bitcast ptr, and load
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Insert));
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
     Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);

+ 1 - 1
lib/HLSL/HLSignatureLower.cpp

@@ -593,7 +593,7 @@ Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst,
                             unsigned cols, MutableArrayRef<Value *> args,
                             unsigned cols, MutableArrayRef<Value *> args,
                             bool bCast) {
                             bool bCast) {
   IRBuilder<> Builder(ldInst);
   IRBuilder<> Builder(ldInst);
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(ldInst));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(ldInst));
   Type *Ty = ldInst->getType();
   Type *Ty = ldInst->getType();
   Type *EltTy = Ty->getScalarType();
   Type *EltTy = Ty->getScalarType();
   // Change i1 to i32 for load input.
   // Change i1 to i32 for load input.

+ 19 - 19
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -1748,7 +1748,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       // separate elements.
       // separate elements.
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
         std::vector<Value *> Elts;
         std::vector<Value *> Elts;
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(AI));
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
 
 
         Type *BrokenUpTy = nullptr;
         Type *BrokenUpTy = nullptr;
@@ -2490,7 +2490,7 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
                                  bool bOut) {
                                  bool bOut) {
   Function *F = CI->getParent()->getParent();
   Function *F = CI->getParent()->getParent();
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
   const DataLayout &DL = F->getParent()->getDataLayout();
   const DataLayout &DL = F->getParent()->getDataLayout();
 
 
   Value *userTyV = CI->getArgOperand(ArgIdx);
   Value *userTyV = CI->getArgOperand(ArgIdx);
@@ -2757,7 +2757,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
   if (HLMatrixType::isa(Ty))
   if (HLMatrixType::isa(Ty))
     return false;
     return false;
 
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
 
 
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
     // Skip HLSL object types and RayQuery.
     // Skip HLSL object types and RayQuery.
@@ -3810,7 +3810,7 @@ void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(BitCastInst *BCI) {
   StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
   StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
   StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
   StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
   Value* srcPtr = BCI->getOperand(0);
   Value* srcPtr = BCI->getOperand(0);
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(BCI->getParent()->getParent()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(BCI->getParent()->getParent()));
   AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
   AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
   IRBuilder<> InstBuilder(BCI);
   IRBuilder<> InstBuilder(BCI);
   std::vector<unsigned> idxlist = { 0 };
   std::vector<unsigned> idxlist = { 0 };
@@ -4311,7 +4311,7 @@ void SROA_Parameter_HLSL::replaceCastParameter(
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
     // OldParam will be removed with Old function.
     // OldParam will be removed with Old function.
     // Create alloca to replace it.
     // Create alloca to replace it.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(&F));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(&F));
     Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
     Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam = AllocParam;
     OldParam = AllocParam;
@@ -4394,7 +4394,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
     IRBuilder<> &Builder) {
     IRBuilder<> &Builder) {
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Module &M = *m_pHLModule->GetModule();
   Module &M = *m_pHLModule->GetModule();
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
 
 
   // Lower resource type to handle ty.
   // Lower resource type to handle ty.
   if (dxilutil::IsHLSLResourceType(Ty)) {
   if (dxilutil::IsHLSLResourceType(Ty)) {
@@ -4433,7 +4433,7 @@ Value *SROA_Parameter_HLSL::castArgumentIfRequired(
     IRBuilder<> &Builder,
     IRBuilder<> &Builder,
     DxilTypeSystem &TypeSys) {
     DxilTypeSystem &TypeSys) {
   Module &M = *m_pHLModule->GetModule();
   Module &M = *m_pHLModule->GetModule();
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
 
 
   if (inputQual == DxilParamInputQual::InPayload) {
   if (inputQual == DxilParamInputQual::InPayload) {
     DXASSERT_NOMSG(isa<StructType>(Ty));
     DXASSERT_NOMSG(isa<StructType>(Ty));
@@ -4600,8 +4600,8 @@ void SROA_Parameter_HLSL::flattenArgument(
 
 
     // Now is safe to create the IRBuilders.
     // Now is safe to create the IRBuilders.
     // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
     // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
-    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
+    IRBuilder<> Builder(dxilutil::FindInsertionPt(EntryBlock));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(EntryBlock));
 
 
     std::vector<Value *> Elts;
     std::vector<Value *> Elts;
 
 
@@ -4924,8 +4924,8 @@ void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DXASSERT(pFuncAnnot, "else invalid function");
   DXASSERT(pFuncAnnot, "else invalid function");
 
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
-  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
 
 
   SmallVector<ReturnInst*, 2> retList;
   SmallVector<ReturnInst*, 2> retList;
   for (BasicBlock &bb : F->getBasicBlockList()) {
   for (BasicBlock &bb : F->getBasicBlockList()) {
@@ -5118,7 +5118,7 @@ static void LegalizeDxilInputOutputs(Function *F,
         // DxilGenerationPass.
         // DxilGenerationPass.
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
                      MatrixOrientation::ColumnMajor;
                      MatrixOrientation::ColumnMajor;
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
 
 
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
                                          : HLCastOpcode::RowMatrixToVecCast;
                                          : HLCastOpcode::RowMatrixToVecCast;
@@ -5180,7 +5180,7 @@ static void LegalizeDxilInputOutputs(Function *F,
 
 
     if (bStoreInputToTemp || bLoadOutputFromTemp) {
     if (bStoreInputToTemp || bLoadOutputFromTemp) {
       IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
       IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
-      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&EntryBlk));
+      IRBuilder<> Builder(dxilutil::FindInsertionPt(&EntryBlk));
 
 
       AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
       AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
       // Replace all uses with temp.
       // Replace all uses with temp.
@@ -5296,8 +5296,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
   std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
   // Split and change to out parameter.
   // Split and change to out parameter.
   if (!retType->isVoidTy()) {
   if (!retType->isVoidTy()) {
-    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
+    IRBuilder<> Builder(dxilutil::FindInsertionPt(EntryBlock));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(EntryBlock));
     Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
     Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
     DxilParameterAnnotation &retAnnotation =
     DxilParameterAnnotation &retAnnotation =
         funcAnnotation->GetRetTypeAnnotation();
         funcAnnotation->GetRetTypeAnnotation();
@@ -5510,8 +5510,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
     LLVMContext &Context = F->getContext();
     LLVMContext &Context = F->getContext();
 
 
     // Parameter cast come from begining of entry block.
     // Parameter cast come from begining of entry block.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(flatF));
-    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(flatF));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(flatF));
+    IRBuilder<> Builder(dxilutil::FindInsertionPt(flatF));
 
 
     while (argIter != flatF->arg_end()) {
     while (argIter != flatF->arg_end()) {
       Argument *Arg = argIter++;
       Argument *Arg = argIter++;
@@ -5765,10 +5765,10 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV
     return false;
     return false;
 
 
   Function *F = const_cast<Function*>(PS.AccessingFunction);
   Function *F = const_cast<Function*>(PS.AccessingFunction);
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
   AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
   AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
 
 
-  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
 
 
   // Store initializer is exist.
   // Store initializer is exist.
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5637,7 +5637,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     Function *F = InsertBlock->getParent();
     Function *F = InsertBlock->getParent();
 
 
     // Make sure the alloca is in entry block to stop inline create stacksave.
     // Make sure the alloca is in entry block to stop inline create stacksave.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
     tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertTypeForMem(ParamTy));
     tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertTypeForMem(ParamTy));
 
 
     // add it to local decl map
     // add it to local decl map

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -733,7 +733,7 @@ void CreateWriteEnabledStaticGlobals(llvm::Module *M, llvm::Function *EF) {
   }
   }
 
 
   IRBuilder<> Builder(
   IRBuilder<> Builder(
-      dxilutil::FirstNonAllocaInsertionPt(&EF->getEntryBlock()));
+      dxilutil::FindInsertionPt(&EF->getEntryBlock()));
   for (GlobalVariable *GV : worklist) {
   for (GlobalVariable *GV : worklist) {
     GlobalVariable *NGV = CreateStaticGlobal(M, GV);
     GlobalVariable *NGV = CreateStaticGlobal(M, GV);
     GV->replaceAllUsesWith(NGV);
     GV->replaceAllUsesWith(NGV);

+ 1 - 1
tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/rayquery-array-2d-dynamic.hlsl

@@ -1,5 +1,6 @@
 // RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
 // RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
 
 
+// CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
 // CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 // CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 RaytracingAccelerationStructure RTAS;
 RaytracingAccelerationStructure RTAS;
 
 
@@ -10,7 +11,6 @@ void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES>
 int C;
 int C;
 
 
 float main(RayDesc rayDesc : RAYDESC) : OUT {
 float main(RayDesc rayDesc : RAYDESC) : OUT {
-  // CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
   // Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
   // Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
   // But that would require 2d array optimization when one index is constant.
   // But that would require 2d array optimization when one index is constant.
   // CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
   // CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)

+ 2 - 2
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -1489,8 +1489,8 @@ TEST_F(ValidationTest, UnusedMetadata) {
 
 
 TEST_F(ValidationTest, MemoryOutOfBound) {
 TEST_F(ValidationTest, MemoryOutOfBound) {
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
-                          "getelementptr [4 x float], [4 x float]* %7, i32 0, i32 3",
-                          "getelementptr [4 x float], [4 x float]* %7, i32 0, i32 10",
+                          "getelementptr [4 x float], [4 x float]* %3, i32 0, i32 3",
+                          "getelementptr [4 x float], [4 x float]* %3, i32 0, i32 10",
                           "Access to out-of-bounds memory is disallowed");
                           "Access to out-of-bounds memory is disallowed");
 }
 }