Browse Source

Avoid inserting other insts before allocas in several passes.

- Improve code that creates allocas/non-allocas to keep allocas first
- Add helper functions to dxilutil
- Use name AllocaBuilder for CreateAlloca for clarity
Tex Riddell 7 years ago
parent
commit
81f8b8c3a2

+ 10 - 0
include/dxc/HLSL/DxilUtil.h

@@ -22,6 +22,7 @@ class LLVMContext;
 class DiagnosticInfo;
 class DiagnosticInfo;
 class Value;
 class Value;
 class Instruction;
 class Instruction;
+class BasicBlock;
 class StringRef;
 class StringRef;
 }
 }
 
 
@@ -37,6 +38,15 @@ namespace dxilutil {
   llvm::Type *GetArrayEltTy(llvm::Type *Ty);
   llvm::Type *GetArrayEltTy(llvm::Type *Ty);
   bool HasDynamicIndexing(llvm::Value *V);
   bool HasDynamicIndexing(llvm::Value *V);
 
 
+  // Find alloca insertion point, given instruction
+  llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I);
+  llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F);
+  llvm::Instruction *SkipAllocas(llvm::Instruction *I);
+  // Get first non-alloca insertion point, to avoid inserting non-allocas before alloca
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I);
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB);
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F);
+
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);
   bool RemoveUnusedFunctions(llvm::Module &M, llvm::Function *EntryFunc,
   bool RemoveUnusedFunctions(llvm::Module &M, llvm::Function *EntryFunc,

+ 4 - 11
lib/HLSL/DxilGenerationPass.cpp

@@ -100,7 +100,7 @@ void SimplifyGlobalSymbol(GlobalVariable *GV) {
     for (auto it : handleMapOnFunction) {
     for (auto it : handleMapOnFunction) {
       Function *F = it.first;
       Function *F = it.first;
       Instruction *I = it.second;
       Instruction *I = it.second;
-      IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
       Value *headLI = Builder.CreateLoad(GV);
       Value *headLI = Builder.CreateLoad(GV);
       I->replaceAllUsesWith(headLI);
       I->replaceAllUsesWith(headLI);
     }
     }
@@ -613,11 +613,10 @@ void DxilGenerationPass::RemoveLocalDxilResourceAllocas(Function *F) {
     Insts.clear();
     Insts.clear();
   }
   }
 }
 }
-
 void DxilGenerationPass::TranslateParamDxilResourceHandles(Function *F, std::unordered_map<Instruction *, Value *> &handleMap) {
 void DxilGenerationPass::TranslateParamDxilResourceHandles(Function *F, std::unordered_map<Instruction *, Value *> &handleMap) {
   Type *handleTy = m_pHLModule->GetOP()->GetHandleType();
   Type *handleTy = m_pHLModule->GetOP()->GetHandleType();
 
 
-  IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
   for (Argument &arg : F->args()) {
   for (Argument &arg : F->args()) {
     Type *Ty = arg.getType();
     Type *Ty = arg.getType();
 
 
@@ -770,9 +769,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
         // Must HLCreateHandle.
         // Must HLCreateHandle.
         CallInst *CI = cast<CallInst>(*(U++));
         CallInst *CI = cast<CallInst>(*(U++));
         // Put createHandle to entry block.
         // Put createHandle to entry block.
-        auto InsertPt =
-            CI->getParent()->getParent()->getEntryBlock().getFirstInsertionPt();
-        IRBuilder<> Builder(InsertPt);
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
         Value *V = Builder.CreateLoad(GV);
         Value *V = Builder.CreateLoad(GV);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         if (m_HasDbgInfo) {
         if (m_HasDbgInfo) {
@@ -796,11 +793,7 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
         if (isa<ConstantInt>(CBIndex)) {
         if (isa<ConstantInt>(CBIndex)) {
           // Put createHandle to entry block for const index.
           // Put createHandle to entry block for const index.
-          auto InsertPt = CI->getParent()
-                              ->getParent()
-                              ->getEntryBlock()
-                              .getFirstInsertionPt();
-          Builder.SetInsertPoint(InsertPt);
+          Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
         }
         }
         // Add GEP for cbv array use.
         // Add GEP for cbv array use.
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
         Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});

+ 28 - 0
lib/HLSL/DxilUtil.cpp

@@ -265,5 +265,33 @@ Value *SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx) {
   }
   }
   return nullptr;
   return nullptr;
 }
 }
+
+llvm::Instruction *SkipAllocas(llvm::Instruction *I) {
+  // Step past any allocas:
+  while (I && isa<AllocaInst>(I))
+    I = I->getNextNode();
+  return I;
+}
+llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I) {
+  Function *F = I->getParent()->getParent();
+  if (F)
+    return F->getEntryBlock().getFirstInsertionPt();
+  else // BB with no parent function
+    return I->getParent()->getFirstInsertionPt();
+}
+llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F) {
+  return F->getEntryBlock().getFirstInsertionPt();
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I) {
+  return SkipAllocas(FindAllocaInsertionPt(I));
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB) {
+  return SkipAllocas(
+    BB->getFirstInsertionPt());
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
+  return SkipAllocas(
+    F->getEntryBlock().getFirstInsertionPt());
+}
 }
 }
 }
 }

+ 4 - 6
lib/HLSL/HLMatrixLowerPass.cpp

@@ -789,18 +789,16 @@ Instruction *HLMatrixLowerPass::TrivialMatBinOpToVec(CallInst *CI) {
 // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
 // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
 // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
 // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
 static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
 static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
+  IRBuilder<> Builder(Insert);
   if (Ty->isPointerTy()) {
   if (Ty->isPointerTy()) {
     // If pointer, we can bitcast directly
     // If pointer, we can bitcast directly
-    IRBuilder<> Builder(Insert);
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
-  }
-  else {
+  } else {
     // If value, we have to alloca, store to bitcast ptr, and load
     // If value, we have to alloca, store to bitcast ptr, and load
-    IRBuilder<> EntryBuilder(Insert->getParent()->getParent()->getEntryBlock().begin());
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
-    Instruction *allocaInst = EntryBuilder.CreateAlloca(allocaTy);
-    IRBuilder<> Builder(Insert);
+    Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
     Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
     Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
     Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
     Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
     return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
     return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);

+ 4 - 6
lib/HLSL/HLOperationLower.cpp

@@ -6727,18 +6727,16 @@ void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper,  H
 // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
 // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
 // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
 // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
 static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
 static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
+  IRBuilder<> Builder(Insert);
   if (Ty->isPointerTy()) {
   if (Ty->isPointerTy()) {
     // If pointer, we can bitcast directly
     // If pointer, we can bitcast directly
-    IRBuilder<> Builder(Insert);
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
     return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
-  }
-  else {
+  } else {
     // If value, we have to alloca, store to bitcast ptr, and load
     // If value, we have to alloca, store to bitcast ptr, and load
-    IRBuilder<> EntryBuilder(Insert->getParent()->getParent()->getEntryBlock().begin());
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
     Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
-    Instruction *allocaInst = EntryBuilder.CreateAlloca(allocaTy);
-    IRBuilder<> Builder(Insert);
+    Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
     Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
     Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
     Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
     Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
     return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
     return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);

+ 46 - 30
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -1605,7 +1605,7 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
       // separate elements.
       // separate elements.
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
       if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
         std::vector<Value *> Elts;
         std::vector<Value *> Elts;
-        IRBuilder<> Builder(AI);
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
 
 
         bool SROAed = SROA_Helper::DoScalarReplacement(
         bool SROAed = SROA_Helper::DoScalarReplacement(
@@ -3068,7 +3068,7 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
 void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
                                  bool bOut) {
                                  bool bOut) {
   Function *F = CI->getParent()->getParent();
   Function *F = CI->getParent()->getParent();
-  IRBuilder<> AllocaBuilder(F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
   const DataLayout &DL = F->getParent()->getDataLayout();
   const DataLayout &DL = F->getParent()->getDataLayout();
 
 
   Value *userTyV = CI->getArgOperand(ArgIdx);
   Value *userTyV = CI->getArgOperand(ArgIdx);
@@ -3269,7 +3269,9 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
   // Skip matrix types.
   // Skip matrix types.
   if (HLMatrixLower::IsMatrixType(Ty))
   if (HLMatrixLower::IsMatrixType(Ty))
     return false;
     return false;
-  
+
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
     // Skip HLSL object types.
     // Skip HLSL object types.
     if (HLModule::IsHLSLObjectType(ST)) {
     if (HLModule::IsHLSLObjectType(ST)) {
@@ -3283,7 +3285,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
     if (SA && SA->IsEmptyStruct())
     if (SA && SA->IsEmptyStruct())
       return true;
       return true;
     for (int i = 0, e = numTypes; i != e; ++i) {
     for (int i = 0, e = numTypes; i != e; ++i) {
-      AllocaInst *NA = Builder.CreateAlloca(ST->getContainedType(i), nullptr, V->getName() + "." + Twine(i));
+      AllocaInst *NA = AllocaBuilder.CreateAlloca(ST->getContainedType(i), nullptr, V->getName() + "." + Twine(i));
       bool markPrecise = hasPrecise;
       bool markPrecise = hasPrecise;
       if (SA) {
       if (SA) {
         DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
         DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
@@ -3324,7 +3326,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
         if (SA && SA->IsEmptyStruct())
         if (SA && SA->IsEmptyStruct())
           return true;
           return true;
         for (int i = 0, e = numTypes; i != e; ++i) {
         for (int i = 0, e = numTypes; i != e; ++i) {
-          AllocaInst *NA = Builder.CreateAlloca(
+          AllocaInst *NA = AllocaBuilder.CreateAlloca(
               CreateNestArrayTy(ElST->getContainedType(i), nestArrayTys),
               CreateNestArrayTy(ElST->getContainedType(i), nestArrayTys),
               nullptr, V->getName() + "." + Twine(i));
               nullptr, V->getName() + "." + Twine(i));
           bool markPrecise = hasPrecise;
           bool markPrecise = hasPrecise;
@@ -3344,7 +3346,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
             nestArrayTys.size() > 1)
             nestArrayTys.size() > 1)
           return false;
           return false;
         for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
         for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
-          AllocaInst *NA = Builder.CreateAlloca(ElTy, nullptr,
+          AllocaInst *NA = AllocaBuilder.CreateAlloca(ElTy, nullptr,
                                                 V->getName() + "." + Twine(i));
                                                 V->getName() + "." + Twine(i));
           Elts.push_back(NA);
           Elts.push_back(NA);
         }
         }
@@ -3362,7 +3364,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
       ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
 
 
       for (int i = 0, e = ElVT->getNumElements(); i != e; ++i) {
       for (int i = 0, e = ElVT->getNumElements(); i != e; ++i) {
-        AllocaInst *NA = Builder.CreateAlloca(scalarArrayTy, nullptr, 
+        AllocaInst *NA = AllocaBuilder.CreateAlloca(scalarArrayTy, nullptr,
                            V->getName() + "." + Twine(i));
                            V->getName() + "." + Twine(i));
         if (hasPrecise)
         if (hasPrecise)
           HLModule::MarkPreciseAttributeWithMetadata(NA);
           HLModule::MarkPreciseAttributeWithMetadata(NA);
@@ -4774,7 +4776,8 @@ void SROA_Parameter_HLSL::replaceCastParameter(
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
   if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
     // OldParam will be removed with Old function.
     // OldParam will be removed with Old function.
     // Create alloca to replace it.
     // Create alloca to replace it.
-    Value *AllocParam = Builder.CreateAlloca(OldTy->getPointerElementType());
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(&F));
+    Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam->replaceAllUsesWith(AllocParam);
     OldParam = AllocParam;
     OldParam = AllocParam;
   }
   }
@@ -4870,6 +4873,8 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
     IRBuilder<> &Builder) {
     IRBuilder<> &Builder) {
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Module &M = *m_pHLModule->GetModule();
   Module &M = *m_pHLModule->GetModule();
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+
   // Lower resource type to handle ty.
   // Lower resource type to handle ty.
   if (HLModule::IsHLSLObjectType(Ty) &&
   if (HLModule::IsHLSLObjectType(Ty) &&
     !HLModule::IsStreamOutputPtrType(V->getType())) {
     !HLModule::IsStreamOutputPtrType(V->getType())) {
@@ -4881,7 +4886,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
         /*opcode*/ 0, HandleTy, { LdRes }, M);
         /*opcode*/ 0, HandleTy, { LdRes }, M);
     }
     }
     else {
     else {
-      V = Builder.CreateAlloca(HandleTy);
+      V = AllocaBuilder.CreateAlloca(HandleTy);
     }
     }
     castParamMap[V] = std::make_pair(Res, inputQual);
     castParamMap[V] = std::make_pair(Res, inputQual);
   }
   }
@@ -4895,7 +4900,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
     if (HLModule::IsHLSLObjectType(AT)) {
     if (HLModule::IsHLSLObjectType(AT)) {
       Value *Res = V;
       Value *Res = V;
       Type *Ty = ArrayType::get(HandleTy, arraySize);
       Type *Ty = ArrayType::get(HandleTy, arraySize);
-      V = Builder.CreateAlloca(Ty);
+      V = AllocaBuilder.CreateAlloca(Ty);
       castParamMap[V] = std::make_pair(Res, inputQual);
       castParamMap[V] = std::make_pair(Res, inputQual);
     }
     }
   }
   }
@@ -4907,9 +4912,11 @@ Value *SROA_Parameter_HLSL::castArgumentIfRequired(
     DxilParamInputQual inputQual, DxilFieldAnnotation &annotation,
     DxilParamInputQual inputQual, DxilFieldAnnotation &annotation,
     std::deque<Value *> &WorkList, IRBuilder<> &Builder) {
     std::deque<Value *> &WorkList, IRBuilder<> &Builder) {
   Module &M = *m_pHLModule->GetModule();
   Module &M = *m_pHLModule->GetModule();
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
+
   // Remove pointer for vector/scalar which is not out.
   // Remove pointer for vector/scalar which is not out.
   if (V->getType()->isPointerTy() && !Ty->isAggregateType() && !bOut) {
   if (V->getType()->isPointerTy() && !Ty->isAggregateType() && !bOut) {
-    Value *Ptr = Builder.CreateAlloca(Ty);
+    Value *Ptr = AllocaBuilder.CreateAlloca(Ty);
     V->replaceAllUsesWith(Ptr);
     V->replaceAllUsesWith(Ptr);
     // Create load here to make correct type.
     // Create load here to make correct type.
     // The Ptr will be store with correct value in replaceCastParameter.
     // The Ptr will be store with correct value in replaceCastParameter.
@@ -4995,6 +5002,7 @@ void SROA_Parameter_HLSL::flattenArgument(
     std::vector<Value *> &FlatParamList,
     std::vector<Value *> &FlatParamList,
     std::vector<DxilParameterAnnotation> &FlatAnnotationList,
     std::vector<DxilParameterAnnotation> &FlatAnnotationList,
     IRBuilder<> &Builder, DbgDeclareInst *DDI) {
     IRBuilder<> &Builder, DbgDeclareInst *DDI) {
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
   std::deque<Value *> WorkList;
   std::deque<Value *> WorkList;
   WorkList.push_back(Arg);
   WorkList.push_back(Arg);
 
 
@@ -5130,7 +5138,7 @@ void SROA_Parameter_HLSL::flattenArgument(
         unsigned  targetIndex;
         unsigned  targetIndex;
         Semantic::DecomposeNameAndIndex(semanticStr, &targetStr, &targetIndex);
         Semantic::DecomposeNameAndIndex(semanticStr, &targetStr, &targetIndex);
         // Replace target parameter with local target.
         // Replace target parameter with local target.
-        AllocaInst *localTarget = Builder.CreateAlloca(Ty);
+        AllocaInst *localTarget = AllocaBuilder.CreateAlloca(Ty);
         V->replaceAllUsesWith(localTarget);
         V->replaceAllUsesWith(localTarget);
         unsigned arraySize = 1;
         unsigned arraySize = 1;
         std::vector<unsigned> arraySizeList;
         std::vector<unsigned> arraySizeList;
@@ -5147,7 +5155,7 @@ void SROA_Parameter_HLSL::flattenArgument(
         // Create flattened target.
         // Create flattened target.
         DxilFieldAnnotation EltAnnotation = annotation;
         DxilFieldAnnotation EltAnnotation = annotation;
         for (unsigned i=0;i<arraySize;i++) {
         for (unsigned i=0;i<arraySize;i++) {
-          Value *Elt = Builder.CreateAlloca(Ty);
+          Value *Elt = AllocaBuilder.CreateAlloca(Ty);
           EltAnnotation.SetSemanticString(targetStr.str()+std::to_string(targetIndex+i));
           EltAnnotation.SetSemanticString(targetStr.str()+std::to_string(targetIndex+i));
 
 
           // Add semantic type.
           // Add semantic type.
@@ -5241,7 +5249,7 @@ void SROA_Parameter_HLSL::flattenArgument(
         // For stream output objects.
         // For stream output objects.
         // Create a value as output value.
         // Create a value as output value.
         Type *outputType = V->getType()->getPointerElementType()->getStructElementType(0);
         Type *outputType = V->getType()->getPointerElementType()->getStructElementType(0);
-        Value *outputVal = Builder.CreateAlloca(outputType);
+        Value *outputVal = AllocaBuilder.CreateAlloca(outputType);
         // For each stream.Append(data)
         // For each stream.Append(data)
         // transform into
         // transform into
         //   d = load data
         //   d = load data
@@ -5362,7 +5370,8 @@ void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
   DXASSERT(pFuncAnnot, "else invalid function");
   DXASSERT(pFuncAnnot, "else invalid function");
 
 
-  IRBuilder<> AllocaBuilder(F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
 
 
   SmallVector<ReturnInst*, 2> retList;
   SmallVector<ReturnInst*, 2> retList;
   for (BasicBlock &bb : F->getBasicBlockList()) {
   for (BasicBlock &bb : F->getBasicBlockList()) {
@@ -5397,7 +5406,7 @@ void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
       if (inputQual == DxilParamInputQual::In ||
       if (inputQual == DxilParamInputQual::In ||
           inputQual == DxilParamInputQual::Inout) {
           inputQual == DxilParamInputQual::Inout) {
         // copy arg to tmp.
         // copy arg to tmp.
-        CallInst *argToTmp = AllocaBuilder.CreateMemCpy(TmpArg, &arg, size, 0);
+        CallInst *argToTmp = Builder.CreateMemCpy(TmpArg, &arg, size, 0);
         // Split the memcpy.
         // Split the memcpy.
         MemcpySplitter::SplitMemCpy(cast<MemCpyInst>(argToTmp), DL, nullptr,
         MemcpySplitter::SplitMemCpy(cast<MemCpyInst>(argToTmp), DL, nullptr,
                                     typeSys);
                                     typeSys);
@@ -5515,7 +5524,7 @@ static void LegalizeDxilInputOutputs(Function *F,
         // DxilGenerationPass.
         // DxilGenerationPass.
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
         isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
                      MatrixOrientation::ColumnMajor;
                      MatrixOrientation::ColumnMajor;
-        IRBuilder<> Builder(EntryBlk.getFirstInsertionPt());
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
 
 
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
         HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
                                          : HLCastOpcode::RowMatrixToVecCast;
                                          : HLCastOpcode::RowMatrixToVecCast;
@@ -5581,9 +5590,10 @@ static void LegalizeDxilInputOutputs(Function *F,
     }
     }
 
 
     if (bNeedTemp) {
     if (bNeedTemp) {
-      IRBuilder<> Builder(EntryBlk.getFirstInsertionPt());
+      IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
+      IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&EntryBlk));
 
 
-      AllocaInst *temp = Builder.CreateAlloca(Ty);
+      AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
       // Replace all uses with temp.
       // Replace all uses with temp.
       arg.replaceAllUsesWith(temp);
       arg.replaceAllUsesWith(temp);
 
 
@@ -5674,9 +5684,9 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
     // Insert point may be removed. So recreate builder every time.
     // Insert point may be removed. So recreate builder every time.
     IRBuilder<> Builder(Ctx);
     IRBuilder<> Builder(Ctx);
     if (!F->isDeclaration()) {
     if (!F->isDeclaration()) {
-      Builder.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
+      Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(F));
     } else {
     } else {
-      Builder.SetInsertPoint(TmpBlockForFuncDecl->getFirstInsertionPt());
+      Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(TmpBlockForFuncDecl.get()));
     }
     }
 
 
     unsigned prevFlatParamCount = FlatParamList.size();
     unsigned prevFlatParamCount = FlatParamList.size();
@@ -5700,12 +5710,15 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   // Split and change to out parameter.
   // Split and change to out parameter.
   if (!retType->isVoidTy()) {
   if (!retType->isVoidTy()) {
     IRBuilder<> Builder(Ctx);
     IRBuilder<> Builder(Ctx);
+    IRBuilder<> AllocaBuilder(Ctx);
     if (!F->isDeclaration()) {
     if (!F->isDeclaration()) {
-      Builder.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
+      Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(F));
+      AllocaBuilder.SetInsertPoint(dxilutil::FindAllocaInsertionPt(F));
     } else {
     } else {
-      Builder.SetInsertPoint(TmpBlockForFuncDecl->getFirstInsertionPt());
+      Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(TmpBlockForFuncDecl.get()));
+      AllocaBuilder.SetInsertPoint(TmpBlockForFuncDecl->getFirstInsertionPt());
     }
     }
-    Value *retValAddr = Builder.CreateAlloca(retType);
+    Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
     DxilParameterAnnotation &retAnnotation =
     DxilParameterAnnotation &retAnnotation =
         funcAnnotation->GetRetTypeAnnotation();
         funcAnnotation->GetRetTypeAnnotation();
     Module &M = *m_pHLModule->GetModule();
     Module &M = *m_pHLModule->GetModule();
@@ -5915,7 +5928,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
     LLVMContext &Context = F->getContext();
     LLVMContext &Context = F->getContext();
 
 
     // Parameter cast come from begining of entry block.
     // Parameter cast come from begining of entry block.
-    IRBuilder<> Builder(flatF->getEntryBlock().getFirstInsertionPt());
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(flatF));
+    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(flatF));
 
 
     while (argIter != flatF->arg_end()) {
     while (argIter != flatF->arg_end()) {
       Argument *Arg = argIter++;
       Argument *Arg = argIter++;
@@ -5940,7 +5954,7 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
             StoreInst *SI = cast<StoreInst>(*flatArg->user_begin());
             StoreInst *SI = cast<StoreInst>(*flatArg->user_begin());
             allocaArg = SI->getPointerOperand();
             allocaArg = SI->getPointerOperand();
           } else {
           } else {
-            allocaArg = Builder.CreateAlloca(flatArg->getType());
+            allocaArg = AllocaBuilder.CreateAlloca(flatArg->getType());
             StoreInst *initArg = Builder.CreateStore(flatArg, allocaArg);
             StoreInst *initArg = Builder.CreateStore(flatArg, allocaArg);
             Value *ldArg = Builder.CreateLoad(allocaArg);
             Value *ldArg = Builder.CreateLoad(allocaArg);
             flatArg->replaceAllUsesWith(ldArg);
             flatArg->replaceAllUsesWith(ldArg);
@@ -6052,8 +6066,10 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV
     return false;
     return false;
 
 
   Function *F = const_cast<Function*>(PS.AccessingFunction);
   Function *F = const_cast<Function*>(PS.AccessingFunction);
-  IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
-  AllocaInst *AI = Builder.CreateAlloca(GV->getType()->getElementType());
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+  AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
+
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
 
 
   // Store initializer is exist.
   // Store initializer is exist.
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
   if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
@@ -6100,9 +6116,9 @@ protected:
 };
 };
 
 
 AllocaInst *LowerTypePass::lowerAlloca(AllocaInst *A) {
 AllocaInst *LowerTypePass::lowerAlloca(AllocaInst *A) {
-  IRBuilder<> Builder(A);
+  IRBuilder<> AllocaBuilder(A);
   Type *NewTy = lowerType(A->getAllocatedType());
   Type *NewTy = lowerType(A->getAllocatedType());
-  return Builder.CreateAlloca(NewTy);
+  return AllocaBuilder.CreateAlloca(NewTy);
 }
 }
 
 
 GlobalVariable *LowerTypePass::lowerInternalGlobal(GlobalVariable *GV) {
 GlobalVariable *LowerTypePass::lowerInternalGlobal(GlobalVariable *GV) {