Browse Source

Revert "Optimize compile times by not skipping allocas (#3168)" (#3183)

This reverts commit 9459577e8f33d3b1cfab9ee0c1ba5fe2f9b6c286.
Greg Roth 4 years ago
parent
commit
754e99b405

+ 8 - 3
include/dxc/DXIL/DxilUtil.h

@@ -59,9 +59,14 @@ namespace dxilutil {
   bool HasDynamicIndexing(llvm::Value *V);
   bool HasDynamicIndexing(llvm::Value *V);
 
 
   // Find alloca insertion point, given instruction
   // Find alloca insertion point, given instruction
-  llvm::Instruction *FindInsertionPt(llvm::Instruction* I); // Considers entire parent function
-  llvm::Instruction *FindInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
-  llvm::Instruction *FindInsertionPt(llvm::Function* F);
+  llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I); // Considers entire parent function
+  llvm::Instruction *FindAllocaInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
+  llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F);
+  llvm::Instruction *SkipAllocas(llvm::Instruction *I);
+  // Get first non-alloca insertion point, to avoid inserting non-allocas before alloca
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I); // Considers entire parent function
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB); // Only considers provided block
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F);
 
 
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);

+ 21 - 6
lib/DXIL/DxilUtil.cpp

@@ -540,18 +540,33 @@ Value *SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx) {
   return nullptr;
   return nullptr;
 }
 }
 
 
-llvm::Instruction *FindInsertionPt(llvm::BasicBlock* BB) {
+llvm::Instruction *SkipAllocas(llvm::Instruction *I) {
+  // Step past any allocas:
+  while (I && (isa<AllocaInst>(I) || isa<DbgInfoIntrinsic>(I)))
+    I = I->getNextNode();
+  return I;
+}
+llvm::Instruction *FindAllocaInsertionPt(llvm::BasicBlock* BB) {
   return &*BB->getFirstInsertionPt();
   return &*BB->getFirstInsertionPt();
 }
 }
-llvm::Instruction *FindInsertionPt(llvm::Function* F) {
-  return FindInsertionPt(&F->getEntryBlock());
+llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F) {
+  return FindAllocaInsertionPt(&F->getEntryBlock());
 }
 }
-llvm::Instruction *FindInsertionPt(llvm::Instruction* I) {
+llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I) {
   Function *F = I->getParent()->getParent();
   Function *F = I->getParent()->getParent();
   if (F)
   if (F)
-    return FindInsertionPt(F);
+    return FindAllocaInsertionPt(F);
   else // BB with no parent function
   else // BB with no parent function
-    return FindInsertionPt(I->getParent());
+    return FindAllocaInsertionPt(I->getParent());
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I) {
+  return SkipAllocas(FindAllocaInsertionPt(I));
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB) {
+  return SkipAllocas(FindAllocaInsertionPt(BB));
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
+  return SkipAllocas(FindAllocaInsertionPt(F));
 }
 }
 
 
 static bool ConsumePrefix(StringRef &Str, StringRef Prefix) {
 static bool ConsumePrefix(StringRef &Str, StringRef Prefix) {

+ 1 - 1
lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp

@@ -100,7 +100,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M) {
   CallInst *HandleForUAV;
   CallInst *HandleForUAV;
   {
   {
     IRBuilder<> Builder(
     IRBuilder<> Builder(
-        dxilutil::FindInsertionPt(DM.GetEntryFunction()));
+        dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
 
 
     unsigned int UAVResourceHandle =
     unsigned int UAVResourceHandle =
         static_cast<unsigned int>(DM.GetUAVs().size());
         static_cast<unsigned int>(DM.GetUAVs().size());

+ 1 - 1
lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

@@ -957,7 +957,7 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
   //
   //
 
 
   Instruction *firstInsertionPt =
   Instruction *firstInsertionPt =
-      dxilutil::FindInsertionPt(DM.GetEntryFunction());
+      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
   IRBuilder<> Builder(firstInsertionPt);
 
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

+ 1 - 1
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -268,7 +268,7 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
   OP *HlslOP = DM.GetOP();
   OP *HlslOP = DM.GetOP();
 
 
   Instruction *firstInsertionPt =
   Instruction *firstInsertionPt =
-      dxilutil::FindInsertionPt(DM.GetEntryFunction());
+      dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
   IRBuilder<> Builder(firstInsertionPt);
 
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};

+ 1 - 1
lib/HLSL/DxilCondenseResources.cpp

@@ -1966,7 +1966,7 @@ void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
   for (iplist<Function>::iterator F : pM->getFunctionList()) {
   for (iplist<Function>::iterator F : pM->getFunctionList()) {
     if (!F->isDeclaration()) {
     if (!F->isDeclaration()) {
       if (!isResArray) {
       if (!isResArray) {
-        IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
         if (m_HasDbgInfo) {
         if (m_HasDbgInfo) {
           // TODO: set debug info.
           // TODO: set debug info.
           // Builder.SetCurrentDebugLocation(DL);
           // Builder.SetCurrentDebugLocation(DL);

+ 1 - 1
lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp

@@ -122,7 +122,7 @@ bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
   if (dynamicSigSet.empty())
   if (dynamicSigSet.empty())
     return false;
     return false;
 
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Entry));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Entry));
 
 
   Value *opcodeV = AllocaBuilder.getInt32(static_cast<unsigned>(opcode));
   Value *opcodeV = AllocaBuilder.getInt32(static_cast<unsigned>(opcode));
   Value *zero = AllocaBuilder.getInt32(0);
   Value *zero = AllocaBuilder.getInt32(0);

+ 3 - 3
lib/HLSL/DxilGenerationPass.cpp

@@ -64,7 +64,7 @@ void SimplifyGlobalSymbol(GlobalVariable *GV) {
     for (auto it : handleMapOnFunction) {
     for (auto it : handleMapOnFunction) {
       Function *F = it.first;
       Function *F = it.first;
       Instruction *I = it.second;
       Instruction *I = it.second;
-      IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
+      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
       Value *headLI = Builder.CreateLoad(GV);
       Value *headLI = Builder.CreateLoad(GV);
       I->replaceAllUsesWith(headLI);
       I->replaceAllUsesWith(headLI);
     }
     }
@@ -537,7 +537,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
         // Must HLCreateHandle.
         // Must HLCreateHandle.
         CallInst *CI = cast<CallInst>(*(U++));
         CallInst *CI = cast<CallInst>(*(U++));
         // Put createHandle to entry block.
         // Put createHandle to entry block.
-        IRBuilder<> Builder(dxilutil::FindInsertionPt(CI));
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
         Value *V = Builder.CreateLoad(GV);
         Value *V = Builder.CreateLoad(GV);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         if (m_HasDbgInfo) {
         if (m_HasDbgInfo) {
@@ -562,7 +562,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         if (isa<ConstantInt>(CBIndex)) {
         if (isa<ConstantInt>(CBIndex)) {
           // Put createHandle to entry block for const index.
           // Put createHandle to entry block for const index.
-          Builder.SetInsertPoint(dxilutil::FindInsertionPt(CI));
+          Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
         }
         }
         // Add GEP for cbv array use.
         // Add GEP for cbv array use.
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});

+ 1 - 1
lib/HLSL/DxilLinker.cpp

@@ -792,7 +792,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   CloneFunctions(vmap);
   CloneFunctions(vmap);
 
 
   // Call global constrctor.
   // Call global constrctor.
-  IRBuilder<> Builder(dxilutil::FindInsertionPt(DM.GetEntryFunction()));
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
   for (auto &it : m_functionDefs) {
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
     DxilLib *pLib = it.second;

+ 1 - 20
lib/HLSL/DxilPreparePasses.cpp

@@ -388,9 +388,6 @@ public:
       unsigned DxilMinor = 0;
       unsigned DxilMinor = 0;
       M.GetDxilModule().GetDxilVersion(DxilMajor, DxilMinor);
       M.GetDxilModule().GetDxilVersion(DxilMajor, DxilMinor);
 
 
-      // Move all allocas to the top of the entry block
-      ConsolidateAllocas(M);
-
       bool IsLib = DM.GetShaderModel()->IsLib();
       bool IsLib = DM.GetShaderModel()->IsLib();
       // Skip validation patch for lib.
       // Skip validation patch for lib.
       if (!IsLib) {
       if (!IsLib) {
@@ -449,22 +446,6 @@ public:
   }
   }
 
 
 private:
 private:
-  void ConsolidateAllocas(Module &M) {
-    for (Function &F : M) {
-      if (F.isDeclaration())
-        continue;
-      Instruction *insertPt = nullptr;
-      for (llvm::Instruction &I : llvm::inst_range(&F)) {
-        if (!insertPt) {
-          if (!isa<AllocaInst>(I) && !isa<DbgInfoIntrinsic>(I))
-            insertPt = &I;
-        } else if (isa<AllocaInst>(I)) {
-          I.moveBefore(insertPt);
-        }
-      }
-    }
-  }
-
   void RemoveUnusedStaticGlobal(Module &M) {
   void RemoveUnusedStaticGlobal(Module &M) {
     // Remove unused internal global.
     // Remove unused internal global.
     std::vector<GlobalVariable *> staticGVs;
     std::vector<GlobalVariable *> staticGVs;
@@ -671,7 +652,7 @@ private:
           Function *F = CI->getParent()->getParent();
           Function *F = CI->getParent()->getParent();
           ICmpInst *Cmp = DxBreakCmpMap.lookup(F);
           ICmpInst *Cmp = DxBreakCmpMap.lookup(F);
           if (!Cmp) {
           if (!Cmp) {
-            Instruction *IP = dxilutil::FindInsertionPt(F);
+            Instruction *IP = dxilutil::FirstNonAllocaInsertionPt(F);
             LoadInst *LI = new LoadInst(Gep, nullptr, false, IP);
             LoadInst *LI = new LoadInst(Gep, nullptr, false, IP);
             Cmp = new ICmpInst(IP, ICmpInst::ICMP_EQ, LI, llvm::ConstantInt::get(i32Ty,0));
             Cmp = new ICmpInst(IP, ICmpInst::ICMP_EQ, LI, llvm::ConstantInt::get(i32Ty,0));
             DxBreakCmpMap[F] = Cmp;
             DxBreakCmpMap[F] = Cmp;

+ 3 - 3
lib/HLSL/HLMatrixLowerPass.cpp

@@ -429,7 +429,7 @@ Value *HLMatrixLowerPass::bitCastValue(Value *SrcVal, Type* DstTy, bool DstTyAll
 
 
   // We store and load from a temporary alloca, bitcasting either on the store pointer
   // We store and load from a temporary alloca, bitcasting either on the store pointer
   // or on the load pointer.
   // or on the load pointer.
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
   Value *Alloca = AllocaBuilder.CreateAlloca(DstTyAlloca ? DstTy : SrcTy);
   Value *Alloca = AllocaBuilder.CreateAlloca(DstTyAlloca ? DstTy : SrcTy);
   Value *BitCastedAlloca = Builder.CreateBitCast(Alloca, (DstTyAlloca ? SrcTy : DstTy)->getPointerTo());
   Value *BitCastedAlloca = Builder.CreateBitCast(Alloca, (DstTyAlloca ? SrcTy : DstTy)->getPointerTo());
   Builder.CreateStore(SrcVal, DstTyAlloca ? BitCastedAlloca : Alloca);
   Builder.CreateStore(SrcVal, DstTyAlloca ? BitCastedAlloca : Alloca);
@@ -476,7 +476,7 @@ void HLMatrixLowerPass::replaceAllUsesByLoweredValue(Instruction* MatInst, Value
       Instruction *PrevInst = dyn_cast<Instruction>(VecVal);
       Instruction *PrevInst = dyn_cast<Instruction>(VecVal);
       if (PrevInst == nullptr) PrevInst = MatInst;
       if (PrevInst == nullptr) PrevInst = MatInst;
 
 
-      IRBuilder<> Builder(PrevInst->getNextNode());
+      IRBuilder<> Builder(dxilutil::SkipAllocas(PrevInst->getNextNode()));
       VecToMatStub = Builder.CreateCall(TranslationStub, { VecVal });
       VecToMatStub = Builder.CreateCall(TranslationStub, { VecVal });
     }
     }
 
 
@@ -743,7 +743,7 @@ Value *HLMatrixLowerPass::lowerNonHLCall(CallInst *Call) {
   // The callee returns a matrix, and we don't lower signatures in this pass.
   // The callee returns a matrix, and we don't lower signatures in this pass.
   // We perform a sketchy bitcast to the lowered register-representation type,
   // We perform a sketchy bitcast to the lowered register-representation type,
   // which the later HLMatrixBitcastLower pass knows how to eliminate.
   // which the later HLMatrixBitcastLower pass knows how to eliminate.
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Call));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Call));
   Value *LoweredAlloca = AllocaBuilder.CreateAlloca(RetMatTy.getLoweredVectorTypeForReg());
   Value *LoweredAlloca = AllocaBuilder.CreateAlloca(RetMatTy.getLoweredVectorTypeForReg());
   
   
   IRBuilder<> PostCallBuilder(Call->getNextNode());
   IRBuilder<> PostCallBuilder(Call->getNextNode());

+ 2 - 2
lib/HLSL/HLMatrixSubscriptUseReplacer.cpp

@@ -139,7 +139,7 @@ Value *HLMatrixSubscriptUseReplacer::tryGetScalarIndex(Value *SubIdxVal, IRBuild
   // We need to dynamically index into the level 1 element indices
   // We need to dynamically index into the level 1 element indices
   if (LazyTempElemIndicesArrayAlloca == nullptr) {
   if (LazyTempElemIndicesArrayAlloca == nullptr) {
     // The level 2 index is dynamic, use it to index a temporary array of the level 1 indices.
     // The level 2 index is dynamic, use it to index a temporary array of the level 1 indices.
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
     ArrayType *ArrayTy = ArrayType::get(AllocaBuilder.getInt32Ty(), ElemIndices.size());
     ArrayType *ArrayTy = ArrayType::get(AllocaBuilder.getInt32Ty(), ElemIndices.size());
     LazyTempElemIndicesArrayAlloca = AllocaBuilder.CreateAlloca(ArrayTy);
     LazyTempElemIndicesArrayAlloca = AllocaBuilder.CreateAlloca(ArrayTy);
   }
   }
@@ -180,7 +180,7 @@ void HLMatrixSubscriptUseReplacer::cacheLoweredMatrix(bool ForDynamicIndexing, I
   // Lazily create the temporary array alloca
   // Lazily create the temporary array alloca
   if (LazyTempElemArrayAlloca == nullptr) {
   if (LazyTempElemArrayAlloca == nullptr) {
     ArrayType *TempElemArrayTy = ArrayType::get(MatVecTy->getElementType(), MatVecTy->getNumElements());
     ArrayType *TempElemArrayTy = ArrayType::get(MatVecTy->getElementType(), MatVecTy->getNumElements());
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
     LazyTempElemArrayAlloca = AllocaBuilder.CreateAlloca(TempElemArrayTy);
     LazyTempElemArrayAlloca = AllocaBuilder.CreateAlloca(TempElemArrayTy);
   }
   }
 
 

+ 2 - 2
lib/HLSL/HLOperationLower.cpp

@@ -6532,7 +6532,7 @@ static ResRetValueArray GenerateTypedBufferLoad(
 
 
 static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
 static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
   DXASSERT_NOMSG(!Values.empty());
   DXASSERT_NOMSG(!Values.empty());
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
   AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
   AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
   for (unsigned i = 0; i < Values.size(); ++i) {
   for (unsigned i = 0; i < Values.size(); ++i) {
     Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
     Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
@@ -7583,7 +7583,7 @@ static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, b
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
   } else {
   } else {
     // If value, we have to alloca, store to bitcast ptr, and load
     // If value, we have to alloca, store to bitcast ptr, and load
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Insert));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
     Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);

+ 1 - 1
lib/HLSL/HLSignatureLower.cpp

@@ -593,7 +593,7 @@ Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst,
                             unsigned cols, MutableArrayRef<Value *> args,
                             unsigned cols, MutableArrayRef<Value *> args,
                             bool bCast) {
                             bool bCast) {
   IRBuilder<> Builder(ldInst);
   IRBuilder<> Builder(ldInst);
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(ldInst));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(ldInst));
   Type *Ty = ldInst->getType();
   Type *Ty = ldInst->getType();
   Type *EltTy = Ty->getScalarType();
   Type *EltTy = Ty->getScalarType();
   // Change i1 to i32 for load input.
   // Change i1 to i32 for load input.

+ 19 - 19
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -1748,7 +1748,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       // separate elements.
       // separate elements.
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
         std::vector<Value *> Elts;
         std::vector<Value *> Elts;
-        IRBuilder<> Builder(dxilutil::FindInsertionPt(AI));
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
 
 
         Type *BrokenUpTy = nullptr;
         Type *BrokenUpTy = nullptr;
@@ -2490,7 +2490,7 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
                                  bool bOut) {
                                  bool bOut) {
   Function *F = CI->getParent()->getParent();
   Function *F = CI->getParent()->getParent();
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
   const DataLayout &DL = F->getParent()->getDataLayout();
   const DataLayout &DL = F->getParent()->getDataLayout();
 
 
   Value *userTyV = CI->getArgOperand(ArgIdx);
   Value *userTyV = CI->getArgOperand(ArgIdx);
@@ -2757,7 +2757,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
   if (HLMatrixType::isa(Ty))
   if (HLMatrixType::isa(Ty))
     return false;
     return false;
 
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
 
 
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
     // Skip HLSL object types and RayQuery.
     // Skip HLSL object types and RayQuery.
@@ -3810,7 +3810,7 @@ void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(BitCastInst *BCI) {
   StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
   StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
   StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
   StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
   Value* srcPtr = BCI->getOperand(0);
   Value* srcPtr = BCI->getOperand(0);
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(BCI->getParent()->getParent()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(BCI->getParent()->getParent()));
   AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
   AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
   IRBuilder<> InstBuilder(BCI);
   IRBuilder<> InstBuilder(BCI);
   std::vector<unsigned> idxlist = { 0 };
   std::vector<unsigned> idxlist = { 0 };
@@ -4311,7 +4311,7 @@ void SROA_Parameter_HLSL::replaceCastParameter(
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
     // OldParam will be removed with Old function.
     // OldParam will be removed with Old function.
     // Create alloca to replace it.
     // Create alloca to replace it.
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(&F));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(&F));
     Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
     Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam = AllocParam;
     OldParam = AllocParam;
@@ -4394,7 +4394,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
     IRBuilder<> &Builder) {
     IRBuilder<> &Builder) {
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Module &M = *m_pHLModule->GetModule();
   Module &M = *m_pHLModule->GetModule();
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
 
 
   // Lower resource type to handle ty.
   // Lower resource type to handle ty.
   if (dxilutil::IsHLSLResourceType(Ty)) {
   if (dxilutil::IsHLSLResourceType(Ty)) {
@@ -4433,7 +4433,7 @@ Value *SROA_Parameter_HLSL::castArgumentIfRequired(
     IRBuilder<> &Builder,
     IRBuilder<> &Builder,
     DxilTypeSystem &TypeSys) {
     DxilTypeSystem &TypeSys) {
   Module &M = *m_pHLModule->GetModule();
   Module &M = *m_pHLModule->GetModule();
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(Builder.GetInsertPoint()));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
 
 
   if (inputQual == DxilParamInputQual::InPayload) {
   if (inputQual == DxilParamInputQual::InPayload) {
     DXASSERT_NOMSG(isa<StructType>(Ty));
     DXASSERT_NOMSG(isa<StructType>(Ty));
@@ -4600,8 +4600,8 @@ void SROA_Parameter_HLSL::flattenArgument(
 
 
     // Now is safe to create the IRBuilders.
     // Now is safe to create the IRBuilders.
     // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
     // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
-    IRBuilder<> Builder(dxilutil::FindInsertionPt(EntryBlock));
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(EntryBlock));
+    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
 
 
     std::vector<Value *> Elts;
     std::vector<Value *> Elts;
 
 
@@ -4924,8 +4924,8 @@ void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DXASSERT(pFuncAnnot, "else invalid function");
   DXASSERT(pFuncAnnot, "else invalid function");
 
 
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
-  IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
 
 
   SmallVector<ReturnInst*, 2> retList;
   SmallVector<ReturnInst*, 2> retList;
   for (BasicBlock &bb : F->getBasicBlockList()) {
   for (BasicBlock &bb : F->getBasicBlockList()) {
@@ -5118,7 +5118,7 @@ static void LegalizeDxilInputOutputs(Function *F,
         // DxilGenerationPass.
         // DxilGenerationPass.
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
                      MatrixOrientation::ColumnMajor;
                      MatrixOrientation::ColumnMajor;
-        IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
 
 
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
                                          : HLCastOpcode::RowMatrixToVecCast;
                                          : HLCastOpcode::RowMatrixToVecCast;
@@ -5180,7 +5180,7 @@ static void LegalizeDxilInputOutputs(Function *F,
 
 
     if (bStoreInputToTemp || bLoadOutputFromTemp) {
     if (bStoreInputToTemp || bLoadOutputFromTemp) {
       IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
       IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
-      IRBuilder<> Builder(dxilutil::FindInsertionPt(&EntryBlk));
+      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&EntryBlk));
 
 
       AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
       AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
       // Replace all uses with temp.
       // Replace all uses with temp.
@@ -5296,8 +5296,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
   std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
   // Split and change to out parameter.
   // Split and change to out parameter.
   if (!retType->isVoidTy()) {
   if (!retType->isVoidTy()) {
-    IRBuilder<> Builder(dxilutil::FindInsertionPt(EntryBlock));
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(EntryBlock));
+    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
     Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
     Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
     DxilParameterAnnotation &retAnnotation =
     DxilParameterAnnotation &retAnnotation =
         funcAnnotation->GetRetTypeAnnotation();
         funcAnnotation->GetRetTypeAnnotation();
@@ -5510,8 +5510,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
     LLVMContext &Context = F->getContext();
     LLVMContext &Context = F->getContext();
 
 
     // Parameter cast come from begining of entry block.
     // Parameter cast come from begining of entry block.
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(flatF));
-    IRBuilder<> Builder(dxilutil::FindInsertionPt(flatF));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(flatF));
+    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(flatF));
 
 
     while (argIter != flatF->arg_end()) {
     while (argIter != flatF->arg_end()) {
       Argument *Arg = argIter++;
       Argument *Arg = argIter++;
@@ -5765,10 +5765,10 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV
     return false;
     return false;
 
 
   Function *F = const_cast<Function*>(PS.AccessingFunction);
   Function *F = const_cast<Function*>(PS.AccessingFunction);
-  IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
   AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
   AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
 
 
-  IRBuilder<> Builder(dxilutil::FindInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
 
 
   // Store initializer is exist.
   // Store initializer is exist.
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5637,7 +5637,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     Function *F = InsertBlock->getParent();
     Function *F = InsertBlock->getParent();
 
 
     // Make sure the alloca is in entry block to stop inline create stacksave.
     // Make sure the alloca is in entry block to stop inline create stacksave.
-    IRBuilder<> AllocaBuilder(dxilutil::FindInsertionPt(F));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
     tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertTypeForMem(ParamTy));
     tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertTypeForMem(ParamTy));
 
 
     // add it to local decl map
     // add it to local decl map

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -733,7 +733,7 @@ void CreateWriteEnabledStaticGlobals(llvm::Module *M, llvm::Function *EF) {
   }
   }
 
 
   IRBuilder<> Builder(
   IRBuilder<> Builder(
-      dxilutil::FindInsertionPt(&EF->getEntryBlock()));
+      dxilutil::FirstNonAllocaInsertionPt(&EF->getEntryBlock()));
   for (GlobalVariable *GV : worklist) {
   for (GlobalVariable *GV : worklist) {
     GlobalVariable *NGV = CreateStaticGlobal(M, GV);
     GlobalVariable *NGV = CreateStaticGlobal(M, GV);
     GV->replaceAllUsesWith(NGV);
     GV->replaceAllUsesWith(NGV);

+ 1 - 1
tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/rayquery-array-2d-dynamic.hlsl

@@ -1,6 +1,5 @@
 // RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
 // RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
 
 
-// CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
 // CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 // CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 RaytracingAccelerationStructure RTAS;
 RaytracingAccelerationStructure RTAS;
 
 
@@ -11,6 +10,7 @@ void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES>
 int C;
 int C;
 
 
 float main(RayDesc rayDesc : RAYDESC) : OUT {
 float main(RayDesc rayDesc : RAYDESC) : OUT {
+  // CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
   // Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
   // Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
   // But that would require 2d array optimization when one index is constant.
   // But that would require 2d array optimization when one index is constant.
   // CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
   // CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)

+ 2 - 2
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -1489,8 +1489,8 @@ TEST_F(ValidationTest, UnusedMetadata) {
 
 
 TEST_F(ValidationTest, MemoryOutOfBound) {
 TEST_F(ValidationTest, MemoryOutOfBound) {
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
-                          "getelementptr [4 x float], [4 x float]* %3, i32 0, i32 3",
-                          "getelementptr [4 x float], [4 x float]* %3, i32 0, i32 10",
+                          "getelementptr [4 x float], [4 x float]* %7, i32 0, i32 3",
+                          "getelementptr [4 x float], [4 x float]* %7, i32 0, i32 10",
                           "Access to out-of-bounds memory is disallowed");
                           "Access to out-of-bounds memory is disallowed");
 }
 }