Browse Source

Optimize compile times by not skipping allocas (#3168)

Instead of skipping past allocas whenever inserting a new insruction,
which ate up a lot of compilation time, they are inserted at the default
insertion point.

The result is that allocas that would have coallesced just after the
global load an input loads are dispersed throughout the commands. So as
part of dxil finalization, the allocas are moved to the beginning of the
entry block of each function. This results in some minor changes to a
couple tests due to the allocas preceding the loads.
Greg Roth 4 years ago
parent
commit
9459577e8f

+ 3 - 8
include/dxc/DXIL/DxilUtil.h

@@ -59,14 +59,9 @@ namespace dxilutil {
   bool HasDynamicIndexing(llvm::Value *V);
 
   // Find alloca insertion point, given instruction
-  llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I); // Considers entire parent function
-  llvm::Instruction *FindAllocaInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
-  llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F);
-  llvm::Instruction *SkipAllocas(llvm::Instruction *I);
-  // Get first non-alloca insertion point, to avoid inserting non-allocas before alloca
-  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I); // Considers entire parent function
-  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
-  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F);
+  llvm::Instruction *FindInsertionPt(llvm::Instruction* I); // Considers entire parent function
+  llvm::Instruction *FindInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
+  llvm::Instruction *FindInsertionPt(llvm::Function* F);
 
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);

+ 6 - 21
lib/DXIL/DxilUtil.cpp

@@ -540,33 +540,18 @@ Value *SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx) {
   return nullptr;
 }
 
-llvm::Instruction *SkipAllocas(llvm::Instruction *I) {
-  // Step past any allocas:
-  while (I && (isa<AllocaInst>(I) || isa<DbgInfoIntrinsic>(I)))
-    I = I->getNextNode();
-  return I;
-}
-llvm::Instruction *FindAllocaInsertionPt(llvm::BasicBlock* BB) {
+llvm::Instruction *FindInsertionPt(llvm::BasicBlock* BB) {
   return &*BB->getFirstInsertionPt();
 }
-llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F) {
-  return FindAllocaInsertionPt(&F->getEntryBlock());
+llvm::Instruction *FindInsertionPt(llvm::Function* F) {
+  return FindInsertionPt(&F->getEntryBlock());
 }
-llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I) {
+llvm::Instruction *FindInsertionPt(llvm::Instruction* I) {
   Function *F = I->getParent()->getParent();
   if (F)
-    return FindAllocaInsertionPt(F);
+    return FindInsertionPt(F);
   else // BB with no parent function
-    return FindAllocaInsertionPt(I->getParent());
-}
-llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I) {
-  return SkipAllocas(FindAllocaInsertionPt(I));
-}
-llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB) {
-  return SkipAllocas(FindAllocaInsertionPt(BB));
-}
-llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
-  return SkipAllocas(FindAllocaInsertionPt(F));
+    return FindInsertionPt(I->getParent());
 }
 
 static bool ConsumePrefix(StringRef &Str, StringRef Prefix) {

+ 1 - 1
lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp

@@ -100,7 +100,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M) {
   CallInst *HandleForUAV;
   {
     IRBuilder<> Builder(
-        dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
+        dxilutil::FindInsertionPt(DM.GetEntryFunction()));
 
     unsigned int UAVResourceHandle =
         static_cast<unsigned int>(DM.GetUAVs().size());

+ 1 - 1
lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

@@ -945,7 +945,7 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
   //
 
   Instruction *firstInsertionPt =
-      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
+      dxilutil::FindInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

+ 1 - 1
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -268,7 +268,7 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
   OP *HlslOP = DM.GetOP();
 
   Instruction *firstInsertionPt =
-      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
+      dxilutil::FindInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

+ 1 - 1
lib/HLSL/DxilCondenseResources.cpp

@@ -1966,7 +1966,7 @@ void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
   for (iplist<Function>::iterator F : pM->getFunctionList()) {
     if (!F->isDeclaration()) {
       if (!isResArray) {
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
         if (m_HasDbgInfo) {
           // TODO: set debug info.
           // Builder.SetCurrentDebugLocation(DL);

+ 1 - 1
lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp

@@ -122,7 +122,7 @@ bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
   if (dynamicSigSet.empty())
     return false;
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Entry));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Entry));
 
   Value *opcodeV = AllocaBuilder.getInt32(static_cast<unsigned>(opcode));
   Value *zero = AllocaBuilder.getInt32(0);

+ 3 - 3
lib/HLSL/DxilGenerationPass.cpp

@@ -64,7 +64,7 @@ void SimplifyGlobalSymbol(GlobalVariable *GV) {
     for (auto it : handleMapOnFunction) {
       Function *F = it.first;
       Instruction *I = it.second;
-      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+      IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
       Value *headLI = Builder.CreateLoad(GV);
       I->replaceAllUsesWith(headLI);
     }
@@ -537,7 +537,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
         // Must HLCreateHandle.
         CallInst *CI = cast<CallInst>(*(U++));
         // Put createHandle to entry block.
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(CI));
         Value *V = Builder.CreateLoad(GV);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         if (m_HasDbgInfo) {
@@ -562,7 +562,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         if (isa<ConstantInt>(CBIndex)) {
           // Put createHandle to entry block for const index.
-          Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
+          Builder.SetInsertPoint(dxilutil::FindInsertionPt(CI));
         }
         // Add GEP for cbv array use.
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});

+ 1 - 1
lib/HLSL/DxilLinker.cpp

@@ -792,7 +792,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   CloneFunctions(vmap);
 
   // Call global constrctor.
-  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
+  IRBuilder<> Builder(dxilutil::FindInsertionPt(DM.GetEntryFunction()));
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;

+ 20 - 1
lib/HLSL/DxilPreparePasses.cpp

@@ -388,6 +388,9 @@ public:
       unsigned DxilMinor = 0;
       M.GetDxilModule().GetDxilVersion(DxilMajor, DxilMinor);
 
+      // Move all allocas to the top of the entry block
+      ConsolidateAllocas(M);
+
       bool IsLib = DM.GetShaderModel()->IsLib();
       // Skip validation patch for lib.
       if (!IsLib) {
@@ -446,6 +449,22 @@ public:
   }
 
 private:
+  void ConsolidateAllocas(Module &M) {
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      Instruction *insertPt = nullptr;
+      for (llvm::Instruction &I : llvm::inst_range(&F)) {
+        if (!insertPt) {
+          if (!isa<AllocaInst>(I) && !isa<DbgInfoIntrinsic>(I))
+            insertPt = &I;
+        } else if (isa<AllocaInst>(I)) {
+          I.moveBefore(insertPt);
+        }
+      }
+    }
+  }
+
   void RemoveUnusedStaticGlobal(Module &M) {
     // Remove unused internal global.
     std::vector<GlobalVariable *> staticGVs;
@@ -652,7 +671,7 @@ private:
           Function *F = CI->getParent()->getParent();
           ICmpInst *Cmp = DxBreakCmpMap.lookup(F);
           if (!Cmp) {
-            Instruction *IP = dxilutil::FirstNonAllocaInsertionPt(F);
+            Instruction *IP = dxilutil::FindInsertionPt(F);
             LoadInst *LI = new LoadInst(Gep, nullptr, false, IP);
             Cmp = new ICmpInst(IP, ICmpInst::ICMP_EQ, LI, llvm::ConstantInt::get(i32Ty,0));
             DxBreakCmpMap[F] = Cmp;

+ 3 - 3
lib/HLSL/HLMatrixLowerPass.cpp

@@ -429,7 +429,7 @@ Value *HLMatrixLowerPass::bitCastValue(Value *SrcVal, Type* DstTy, bool DstTyAll
 
   // We store and load from a temporary alloca, bitcasting either on the store pointer
   // or on the load pointer.
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
   Value *Alloca = AllocaBuilder.CreateAlloca(DstTyAlloca ? DstTy : SrcTy);
   Value *BitCastedAlloca = Builder.CreateBitCast(Alloca, (DstTyAlloca ? SrcTy : DstTy)->getPointerTo());
   Builder.CreateStore(SrcVal, DstTyAlloca ? BitCastedAlloca : Alloca);
@@ -476,7 +476,7 @@ void HLMatrixLowerPass::replaceAllUsesByLoweredValue(Instruction* MatInst, Value
       Instruction *PrevInst = dyn_cast<Instruction>(VecVal);
       if (PrevInst == nullptr) PrevInst = MatInst;
 
-      IRBuilder<> Builder(dxilutil::SkipAllocas(PrevInst->getNextNode()));
+      IRBuilder<> Builder(PrevInst->getNextNode());
       VecToMatStub = Builder.CreateCall(TranslationStub, { VecVal });
     }
 
@@ -743,7 +743,7 @@ Value *HLMatrixLowerPass::lowerNonHLCall(CallInst *Call) {
   // The callee returns a matrix, and we don't lower signatures in this pass.
   // We perform a sketchy bitcast to the lowered register-representation type,
   // which the later HLMatrixBitcastLower pass knows how to eliminate.
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Call));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Call));
   Value *LoweredAlloca = AllocaBuilder.CreateAlloca(RetMatTy.getLoweredVectorTypeForReg());
   
   IRBuilder<> PostCallBuilder(Call->getNextNode());

+ 2 - 2
lib/HLSL/HLMatrixSubscriptUseReplacer.cpp

@@ -139,7 +139,7 @@ Value *HLMatrixSubscriptUseReplacer::tryGetScalarIndex(Value *SubIdxVal, IRBuild
   // We need to dynamically index into the level 1 element indices
   if (LazyTempElemIndicesArrayAlloca == nullptr) {
     // The level 2 index is dynamic, use it to index a temporary array of the level 1 indices.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
     ArrayType *ArrayTy = ArrayType::get(AllocaBuilder.getInt32Ty(), ElemIndices.size());
     LazyTempElemIndicesArrayAlloca = AllocaBuilder.CreateAlloca(ArrayTy);
   }
@@ -180,7 +180,7 @@ void HLMatrixSubscriptUseReplacer::cacheLoweredMatrix(bool ForDynamicIndexing, I
   // Lazily create the temporary array alloca
   if (LazyTempElemArrayAlloca == nullptr) {
     ArrayType *TempElemArrayTy = ArrayType::get(MatVecTy->getElementType(), MatVecTy->getNumElements());
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
     LazyTempElemArrayAlloca = AllocaBuilder.CreateAlloca(TempElemArrayTy);
   }
 

+ 2 - 2
lib/HLSL/HLOperationLower.cpp

@@ -6532,7 +6532,7 @@ static ResRetValueArray GenerateTypedBufferLoad(
 
 static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
   DXASSERT_NOMSG(!Values.empty());
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
   AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
   for (unsigned i = 0; i < Values.size(); ++i) {
     Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
@@ -7583,7 +7583,7 @@ static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, b
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
   } else {
     // If value, we have to alloca, store to bitcast ptr, and load
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Insert));
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);

+ 1 - 1
lib/HLSL/HLSignatureLower.cpp

@@ -593,7 +593,7 @@ Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst,
                             unsigned cols, MutableArrayRef<Value *> args,
                             bool bCast) {
   IRBuilder<> Builder(ldInst);
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(ldInst));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(ldInst));
   Type *Ty = ldInst->getType();
   Type *EltTy = Ty->getScalarType();
   // Change i1 to i32 for load input.

+ 19 - 19
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -1748,7 +1748,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       // separate elements.
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
         std::vector<Value *> Elts;
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(AI));
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
 
         Type *BrokenUpTy = nullptr;
@@ -2490,7 +2490,7 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
                                  bool bOut) {
   Function *F = CI->getParent()->getParent();
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
   const DataLayout &DL = F->getParent()->getDataLayout();
 
   Value *userTyV = CI->getArgOperand(ArgIdx);
@@ -2757,7 +2757,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
   if (HLMatrixType::isa(Ty))
     return false;
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
 
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
     // Skip HLSL object types and RayQuery.
@@ -3810,7 +3810,7 @@ void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(BitCastInst *BCI) {
   StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
   StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
   Value* srcPtr = BCI->getOperand(0);
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(BCI->getParent()->getParent()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(BCI->getParent()->getParent()));
   AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
   IRBuilder<> InstBuilder(BCI);
   std::vector<unsigned> idxlist = { 0 };
@@ -4311,7 +4311,7 @@ void SROA_Parameter_HLSL::replaceCastParameter(
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
     // OldParam will be removed with Old function.
     // Create alloca to replace it.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(&F));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(&F));
     Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam = AllocParam;
@@ -4394,7 +4394,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
     IRBuilder<> &Builder) {
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Module &M = *m_pHLModule->GetModule();
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
 
   // Lower resource type to handle ty.
   if (dxilutil::IsHLSLResourceType(Ty)) {
@@ -4433,7 +4433,7 @@ Value *SROA_Parameter_HLSL::castArgumentIfRequired(
     IRBuilder<> &Builder,
     DxilTypeSystem &TypeSys) {
   Module &M = *m_pHLModule->GetModule();
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
 
   if (inputQual == DxilParamInputQual::InPayload) {
     DXASSERT_NOMSG(isa<StructType>(Ty));
@@ -4600,8 +4600,8 @@ void SROA_Parameter_HLSL::flattenArgument(
 
     // Now is safe to create the IRBuilders.
     // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
-    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
+    IRBuilder<> Builder(dxilutil::FindInsertionPt(EntryBlock));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(EntryBlock));
 
     std::vector<Value *> Elts;
 
@@ -4924,8 +4924,8 @@ void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DXASSERT(pFuncAnnot, "else invalid function");
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
-  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
 
   SmallVector<ReturnInst*, 2> retList;
   for (BasicBlock &bb : F->getBasicBlockList()) {
@@ -5118,7 +5118,7 @@ static void LegalizeDxilInputOutputs(Function *F,
         // DxilGenerationPass.
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
                      MatrixOrientation::ColumnMajor;
-        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+        IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
 
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
                                          : HLCastOpcode::RowMatrixToVecCast;
@@ -5180,7 +5180,7 @@ static void LegalizeDxilInputOutputs(Function *F,
 
     if (bStoreInputToTemp || bLoadOutputFromTemp) {
       IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
-      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&EntryBlk));
+      IRBuilder<> Builder(dxilutil::FindInsertionPt(&EntryBlk));
 
       AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
       // Replace all uses with temp.
@@ -5296,8 +5296,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
   // Split and change to out parameter.
   if (!retType->isVoidTy()) {
-    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
+    IRBuilder<> Builder(dxilutil::FindInsertionPt(EntryBlock));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(EntryBlock));
     Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
     DxilParameterAnnotation &retAnnotation =
         funcAnnotation->GetRetTypeAnnotation();
@@ -5510,8 +5510,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
     LLVMContext &Context = F->getContext();
 
     // Parameter cast come from begining of entry block.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(flatF));
-    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(flatF));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(flatF));
+    IRBuilder<> Builder(dxilutil::FindInsertionPt(flatF));
 
     while (argIter != flatF->arg_end()) {
       Argument *Arg = argIter++;
@@ -5765,10 +5765,10 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV
     return false;
 
   Function *F = const_cast<Function*>(PS.AccessingFunction);
-  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
   AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
 
-  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
 
   // Store initializer is exist.
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5637,7 +5637,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     Function *F = InsertBlock->getParent();
 
     // Make sure the alloca is in entry block to stop inline create stacksave.
-    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
     tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertTypeForMem(ParamTy));
 
     // add it to local decl map

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -733,7 +733,7 @@ void CreateWriteEnabledStaticGlobals(llvm::Module *M, llvm::Function *EF) {
   }
 
   IRBuilder<> Builder(
-      dxilutil::FirstNonAllocaInsertionPt(&EF->getEntryBlock()));
+      dxilutil::FindInsertionPt(&EF->getEntryBlock()));
   for (GlobalVariable *GV : worklist) {
     GlobalVariable *NGV = CreateStaticGlobal(M, GV);
     GV->replaceAllUsesWith(NGV);

+ 1 - 1
tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/rayquery-array-2d-dynamic.hlsl

@@ -1,5 +1,6 @@
 // RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
 
+// CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
 // CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 RaytracingAccelerationStructure RTAS;
 
@@ -10,7 +11,6 @@ void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES>
 int C;
 
 float main(RayDesc rayDesc : RAYDESC) : OUT {
-  // CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
   // Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
   // But that would require 2d array optimization when one index is constant.
   // CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)

+ 2 - 2
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -1489,8 +1489,8 @@ TEST_F(ValidationTest, UnusedMetadata) {
 
 TEST_F(ValidationTest, MemoryOutOfBound) {
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
-                          "getelementptr [4 x float], [4 x float]* %7, i32 0, i32 3",
-                          "getelementptr [4 x float], [4 x float]* %7, i32 0, i32 10",
+                          "getelementptr [4 x float], [4 x float]* %3, i32 0, i32 3",
+                          "getelementptr [4 x float], [4 x float]* %3, i32 0, i32 10",
                           "Access to out-of-bounds memory is disallowed");
 }