Sfoglia il codice sorgente

Split DxilGenerationPass.cpp into several files for its several passes (#2180)

Tristan Labelle 6 anni fa
parent
commit
347c04d51c

+ 1 - 0
include/dxc/HLSL/HLModule.h

@@ -28,6 +28,7 @@
 #include <unordered_set>
 
 namespace llvm {
+template<typename T> class ArrayRef;
 class LLVMContext;
 class Module;
 class Function;

+ 6 - 0
lib/HLSL/CMakeLists.txt

@@ -10,9 +10,12 @@ add_llvm_library(LLVMHLSL
   DxilEliminateOutputDynamicIndexing.cpp
   DxilExpandTrigIntrinsics.cpp
   DxilGenerationPass.cpp
+  DxilLegalizeEvalOperations.cpp
   DxilLegalizeSampleOffsetPass.cpp
   DxilLinker.cpp
+  DxilPrecisePropagatePass.cpp
   DxilPreparePasses.cpp
+  DxilPromoteResourcePasses.cpp
   DxilPackSignatureElement.cpp
   DxilPatchShaderRecordBindings.cpp
   DxilPreserveAllOutputs.cpp
@@ -20,14 +23,17 @@ add_llvm_library(LLVMHLSL
   DxilSignatureValidation.cpp
   DxilTargetLowering.cpp
   DxilTargetTransformInfo.cpp
+  DxilTranslateRawBuffer.cpp
   DxilExportMap.cpp
   DxilValidation.cpp
   DxcOptimizer.cpp
+  HLDeadFunctionElimination.cpp
   HLExpandStoreIntrinsics.cpp
   HLMatrixBitcastLowerPass.cpp
   HLMatrixLowerPass.cpp
   HLMatrixSubscriptUseReplacer.cpp
   HLMatrixType.cpp
+  HLMetadataPasses.cpp
   HLModule.cpp
   HLOperations.cpp
   HLOperationLower.cpp

+ 19 - 1166
lib/HLSL/DxilGenerationPass.cpp

@@ -9,39 +9,33 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
-#include "dxc/HLSL/DxilGenerationPass.h"
-#include "dxc/DXIL/DxilOperations.h"
+#include "HLSignatureLower.h"
+#include "dxc/DXIL/DxilEntryProps.h"
 #include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilUtil.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
 #include "dxc/HLSL/HLModule.h"
+#include "dxc/HLSL/HLOperationLower.h"
 #include "dxc/HLSL/HLOperations.h"
-#include "dxc/DXIL/DxilInstructions.h"
-#include "dxc/HlslIntrinsicOp.h"
 #include "dxc/Support/Global.h"
-#include "dxc/DXIL/DxilTypeSystem.h"
-#include "dxc/HLSL/HLOperationLower.h"
-#include "HLSignatureLower.h"
-#include "dxc/DXIL/DxilUtil.h"
-#include "dxc/Support/exception.h"
-#include "dxc/DXIL/DxilEntryProps.h"
-
-#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/IR/Dominators.h"
-#include <memory>
+#include <unordered_map>
 #include <unordered_set>
-#include <iterator>
+#include <vector>
 
 using namespace llvm;
 using namespace hlsl;
@@ -50,40 +44,6 @@ using namespace hlsl;
 
 namespace {
 
-// Collect unused phi of resources and remove them.
-class ResourceRemover : public LoadAndStorePromoter {
-  AllocaInst *AI;
-  mutable std::unordered_set<PHINode *> unusedPhis;
-
-public:
-  ResourceRemover(ArrayRef<Instruction *> Insts, SSAUpdater &S)
-      : LoadAndStorePromoter(Insts, S), AI(nullptr) {}
-
-  void run(AllocaInst *AI, const SmallVectorImpl<Instruction *> &Insts) {
-    // Remember which alloca we're promoting (for isInstInList).
-    this->AI = AI;
-    LoadAndStorePromoter::run(Insts);
-    for (PHINode *P : unusedPhis) {
-      P->eraseFromParent();
-    }
-  }
-  bool
-  isInstInList(Instruction *I,
-               const SmallVectorImpl<Instruction *> &Insts) const override {
-    if (LoadInst *LI = dyn_cast<LoadInst>(I))
-      return LI->getOperand(0) == AI;
-    return cast<StoreInst>(I)->getPointerOperand() == AI;
-  }
-
-  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
-    if (PHINode *PHI = dyn_cast<PHINode>(V)) {
-      if (PHI->user_empty())
-        unusedPhis.insert(PHI);
-    }
-    LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
-  }
-};
-
 void SimplifyGlobalSymbol(GlobalVariable *GV) {
   Type *Ty = GV->getType()->getElementType();
   if (!Ty->isArrayTy()) {
@@ -629,1110 +589,3 @@ ModulePass *llvm::createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensio
 }
 
 INITIALIZE_PASS(DxilGenerationPass, "dxilgen", "HLSL DXIL Generation", false, false)
-
-
-///////////////////////////////////////////////////////////////////////////////
-
-namespace {
-class HLEmitMetadata : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit HLEmitMetadata() : ModulePass(ID) {}
-
-  const char *getPassName() const override { return "HLSL High-Level Metadata Emit"; }
-
-  bool runOnModule(Module &M) override {
-    if (M.HasHLModule()) {
-      HLModule::ClearHLMetadata(M);
-      M.GetHLModule().EmitHLMetadata();
-      return true;
-    }
-
-    return false;
-  }
-};
-}
-
-char HLEmitMetadata::ID = 0;
-
-ModulePass *llvm::createHLEmitMetadataPass() {
-  return new HLEmitMetadata();
-}
-
-INITIALIZE_PASS(HLEmitMetadata, "hlsl-hlemit", "HLSL High-Level Metadata Emit", false, false)
-
-///////////////////////////////////////////////////////////////////////////////
-
-namespace {
-class HLEnsureMetadata : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit HLEnsureMetadata() : ModulePass(ID) {}
-
-  const char *getPassName() const override { return "HLSL High-Level Metadata Ensure"; }
-
-  bool runOnModule(Module &M) override {
-    if (!M.HasHLModule()) {
-      M.GetOrCreateHLModule();
-      return true;
-    }
-
-    return false;
-  }
-};
-}
-
-char HLEnsureMetadata::ID = 0;
-
-ModulePass *llvm::createHLEnsureMetadataPass() {
-  return new HLEnsureMetadata();
-}
-
-INITIALIZE_PASS(HLEnsureMetadata, "hlsl-hlensure", "HLSL High-Level Metadata Ensure", false, false)
-
-///////////////////////////////////////////////////////////////////////////////
-// Precise propagate.
-
-namespace {
-class DxilPrecisePropagatePass : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit DxilPrecisePropagatePass() : ModulePass(ID) {}
-
-  const char *getPassName() const override { return "DXIL Precise Propagate"; }
-
-  bool runOnModule(Module &M) override {
-    DxilModule &dxilModule = M.GetOrCreateDxilModule();
-    DxilTypeSystem &typeSys = dxilModule.GetTypeSystem();
-    std::unordered_set<Instruction*> processedSet;
-    std::vector<Function*> deadList;
-    for (Function &F : M.functions()) {
-      if (HLModule::HasPreciseAttribute(&F)) {
-        PropagatePreciseOnFunctionUser(F, typeSys, processedSet);
-        deadList.emplace_back(&F);
-      }
-    }
-    for (Function *F : deadList)
-      F->eraseFromParent();
-    return true;
-  }
-
-private:
-  void PropagatePreciseOnFunctionUser(
-      Function &F, DxilTypeSystem &typeSys,
-      std::unordered_set<Instruction *> &processedSet);
-};
-
-char DxilPrecisePropagatePass::ID = 0;
-
-}
-
-static void PropagatePreciseAttribute(Instruction *I, DxilTypeSystem &typeSys,
-    std::unordered_set<Instruction *> &processedSet);
-
-static void PropagatePreciseAttributeOnOperand(
-    Value *V, DxilTypeSystem &typeSys, LLVMContext &Context,
-    std::unordered_set<Instruction *> &processedSet) {
-  Instruction *I = dyn_cast<Instruction>(V);
-  // Skip none inst.
-  if (!I)
-    return;
-
-  FPMathOperator *FPMath = dyn_cast<FPMathOperator>(I);
-  // Skip none FPMath
-  if (!FPMath)
-    return;
-
-  // Skip inst already marked.
-  if (processedSet.count(I) > 0)
-    return;
-  // TODO: skip precise on integer type, sample instruction...
-  processedSet.insert(I);
-  // Set precise fast math on those instructions that support it.
-  if (DxilModule::PreservesFastMathFlags(I))
-    DxilModule::SetPreciseFastMathFlags(I);
-
-  // Fast math not work on call, use metadata.
-  if (CallInst *CI = dyn_cast<CallInst>(I))
-    HLModule::MarkPreciseAttributeWithMetadata(CI);
-  PropagatePreciseAttribute(I, typeSys, processedSet);
-}
-
-static void PropagatePreciseAttributeOnPointer(
-    Value *Ptr, DxilTypeSystem &typeSys, LLVMContext &Context,
-    std::unordered_set<Instruction *> &processedSet) {
-  // Find all store and propagate on the val operand of store.
-  // For CallInst, if Ptr is used as out parameter, mark it.
-  for (User *U : Ptr->users()) {
-    Instruction *user = cast<Instruction>(U);
-    if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
-      Value *val = stInst->getValueOperand();
-      PropagatePreciseAttributeOnOperand(val, typeSys, Context, processedSet);
-    } else if (CallInst *CI = dyn_cast<CallInst>(user)) {
-      bool bReadOnly = true;
-
-      Function *F = CI->getCalledFunction();
-      const DxilFunctionAnnotation *funcAnnotation =
-          typeSys.GetFunctionAnnotation(F);
-      for (unsigned i = 0; i < CI->getNumArgOperands(); ++i) {
-        if (Ptr != CI->getArgOperand(i))
-          continue;
-
-        const DxilParameterAnnotation &paramAnnotation =
-            funcAnnotation->GetParameterAnnotation(i);
-        // OutputPatch and OutputStream will be checked after scalar repl.
-        // Here only check out/inout
-        if (paramAnnotation.GetParamInputQual() == DxilParamInputQual::Out ||
-            paramAnnotation.GetParamInputQual() == DxilParamInputQual::Inout) {
-          bReadOnly = false;
-          break;
-        }
-      }
-
-      if (!bReadOnly)
-        PropagatePreciseAttributeOnOperand(CI, typeSys, Context, processedSet);
-    }
-  }
-}
-
-static void
-PropagatePreciseAttribute(Instruction *I, DxilTypeSystem &typeSys,
-                          std::unordered_set<Instruction *> &processedSet) {
-  LLVMContext &Context = I->getContext();
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
-    PropagatePreciseAttributeOnPointer(AI, typeSys, Context, processedSet);
-  } else if (dyn_cast<CallInst>(I)) {
-    // Propagate every argument.
-    // TODO: only propagate precise argument.
-    for (Value *src : I->operands())
-      PropagatePreciseAttributeOnOperand(src, typeSys, Context, processedSet);
-  } else if (dyn_cast<FPMathOperator>(I)) {
-    // TODO: only propagate precise argument.
-    for (Value *src : I->operands())
-      PropagatePreciseAttributeOnOperand(src, typeSys, Context, processedSet);
-  } else if (LoadInst *ldInst = dyn_cast<LoadInst>(I)) {
-    Value *Ptr = ldInst->getPointerOperand();
-    PropagatePreciseAttributeOnPointer(Ptr, typeSys, Context, processedSet);
-  } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
-    PropagatePreciseAttributeOnPointer(GEP, typeSys, Context, processedSet);
-  // TODO: support more case which need
-}
-
-void DxilPrecisePropagatePass::PropagatePreciseOnFunctionUser(
-    Function &F, DxilTypeSystem &typeSys,
-    std::unordered_set<Instruction *> &processedSet) {
-  LLVMContext &Context = F.getContext();
-  for (auto U = F.user_begin(), E = F.user_end(); U != E;) {
-    CallInst *CI = cast<CallInst>(*(U++));
-    Value *V = CI->getArgOperand(0);
-    PropagatePreciseAttributeOnOperand(V, typeSys, Context, processedSet);
-    CI->eraseFromParent();
-  }
-}
-
-ModulePass *llvm::createDxilPrecisePropagatePass() {
-  return new DxilPrecisePropagatePass();
-}
-
-INITIALIZE_PASS(DxilPrecisePropagatePass, "hlsl-dxil-precise", "DXIL precise attribute propagate", false, false)
-
-///////////////////////////////////////////////////////////////////////////////
-
-namespace {
-class HLDeadFunctionElimination : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit HLDeadFunctionElimination () : ModulePass(ID) {}
-
-  const char *getPassName() const override { return "Remove all unused function except entry from HLModule"; }
-
-  bool runOnModule(Module &M) override {
-    if (M.HasHLModule()) {
-      HLModule &HLM = M.GetHLModule();
-
-      bool IsLib = HLM.GetShaderModel()->IsLib();
-      // Remove unused functions except entry and patch constant func.
-      // For library profile, only remove unused external functions.
-      Function *EntryFunc = HLM.GetEntryFunction();
-      Function *PatchConstantFunc = HLM.GetPatchConstantFunction();
-
-      return dxilutil::RemoveUnusedFunctions(M, EntryFunc, PatchConstantFunc,
-                                             IsLib);
-    }
-
-    return false;
-  }
-};
-}
-
-char HLDeadFunctionElimination::ID = 0;
-
-ModulePass *llvm::createHLDeadFunctionEliminationPass() {
-  return new HLDeadFunctionElimination();
-}
-
-INITIALIZE_PASS(HLDeadFunctionElimination, "hl-dfe", "Remove all unused function except entry from HLModule", false, false)
-
-
-///////////////////////////////////////////////////////////////////////////////
-// Legalize resource use.
-// Map local or static global resource to global resource.
-// Require inline for static global resource.
-
-namespace {
-
-static const StringRef kStaticResourceLibErrorMsg = "static global resource use is disallowed in library exports.";
-
-class DxilPromoteStaticResources : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit DxilPromoteStaticResources()
-      : ModulePass(ID) {}
-
-  const char *getPassName() const override {
-    return "DXIL Legalize Static Resource Use";
-  }
-
-  bool runOnModule(Module &M) override {
-    // Promote static global variables.
-    return PromoteStaticGlobalResources(M);
-  }
-
-private:
-  bool PromoteStaticGlobalResources(Module &M);
-};
-
-char DxilPromoteStaticResources::ID = 0;
-
-class DxilPromoteLocalResources : public FunctionPass {
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit DxilPromoteLocalResources()
-      : FunctionPass(ID) {}
-
-  const char *getPassName() const override {
-    return "DXIL Legalize Resource Use";
-  }
-
-  bool runOnFunction(Function &F) override {
-    // Promote local resource first.
-    return PromoteLocalResource(F);
-  }
-
-private:
-  bool PromoteLocalResource(Function &F);
-};
-
-char DxilPromoteLocalResources::ID = 0;
-
-}
-
-void DxilPromoteLocalResources::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequired<DominatorTreeWrapperPass>();
-  AU.setPreservesAll();
-}
-
-bool DxilPromoteLocalResources::PromoteLocalResource(Function &F) {
-  bool bModified = false;
-  std::vector<AllocaInst *> Allocas;
-  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  AssumptionCache &AC =
-      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-
-  BasicBlock &BB = F.getEntryBlock();
-  unsigned allocaSize = 0;
-  while (1) {
-    Allocas.clear();
-
-    // Find allocas that are safe to promote, by looking at all instructions in
-    // the entry node
-    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { // Is it an alloca?
-        if (dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(AI->getAllocatedType()))) {
-          if (isAllocaPromotable(AI))
-            Allocas.push_back(AI);
-        }
-      }
-    if (Allocas.empty())
-      break;
-
-    // No update.
-    // Report error and break.
-    if (allocaSize == Allocas.size()) {
-      F.getContext().emitError(dxilutil::kResourceMapErrorMsg);
-      break;
-    }
-    allocaSize = Allocas.size();
-
-    PromoteMemToReg(Allocas, *DT, nullptr, &AC);
-    bModified = true;
-  }
-
-  return bModified;
-}
-
-FunctionPass *llvm::createDxilPromoteLocalResources() {
-  return new DxilPromoteLocalResources();
-}
-
-INITIALIZE_PASS_BEGIN(DxilPromoteLocalResources,
-                      "hlsl-dxil-promote-local-resources",
-                      "DXIL promote local resource use", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(DxilPromoteLocalResources,
-                    "hlsl-dxil-promote-local-resources",
-                    "DXIL promote local resource use", false, true)
-
-bool DxilPromoteStaticResources::PromoteStaticGlobalResources(
-    Module &M) {
-  if (M.GetOrCreateHLModule().GetShaderModel()->IsLib()) {
-    // Read/write to global static resource is disallowed for libraries:
-    // Resource use needs to be resolved to a single real global resource,
-    // but it may not be possible since any external function call may re-enter
-    // at any other library export, which could modify the global static
-    // between write and read.
-    // While it could work for certain cases, describing the boundary at
-    // the HLSL level is difficult, so at this point it's better to disallow.
-    // example of what could work:
-    //  After inlining, exported functions must have writes to static globals
-    //  before reads, and must not have any external function calls between
-    //  writes and subsequent reads, such that the static global may be
-    //  optimized away for the exported function.
-    for (auto &GV : M.globals()) {
-      if (GV.getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
-        dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(GV.getType()))) {
-        if (!GV.user_empty()) {
-          if (Instruction *I = dyn_cast<Instruction>(*GV.user_begin())) {
-            dxilutil::EmitErrorOnInstruction(I, kStaticResourceLibErrorMsg);
-            break;
-          }
-        }
-      }
-    }
-    return false;
-  }
-
-  bool bModified = false;
-  std::set<GlobalVariable *> staticResources;
-  for (auto &GV : M.globals()) {
-    if (GV.getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
-        dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(GV.getType()))) {
-      staticResources.insert(&GV);
-    }
-  }
-  SSAUpdater SSA;
-  SmallVector<Instruction *, 4> Insts;
-  // Make sure every resource load has mapped to global variable.
-  while (!staticResources.empty()) {
-    bool bUpdated = false;
-    for (auto it = staticResources.begin(); it != staticResources.end();) {
-      GlobalVariable *GV = *(it++);
-      // Build list of instructions to promote.
-      for (User *U : GV->users()) {
-        Instruction *I = cast<Instruction>(U);
-        Insts.emplace_back(I);
-      }
-
-      LoadAndStorePromoter(Insts, SSA).run(Insts);
-      if (GV->user_empty()) {
-        bUpdated = true;
-        staticResources.erase(GV);
-      }
-
-      Insts.clear();
-    }
-    if (!bUpdated) {
-      M.getContext().emitError(dxilutil::kResourceMapErrorMsg);
-      break;
-    }
-    bModified = true;
-  }
-  return bModified;
-}
-
-ModulePass *llvm::createDxilPromoteStaticResources() {
-  return new DxilPromoteStaticResources();
-}
-
-INITIALIZE_PASS(DxilPromoteStaticResources,
-                "hlsl-dxil-promote-static-resources",
-                "DXIL promote static resource use", false, false)
-
-///////////////////////////////////////////////////////////////////////////////
-// Legalize EvalOperations.
-// Make sure src of EvalOperations are from function parameter.
-// This is needed in order to translate EvaluateAttribute operations that traces
-// back to LoadInput operations during translation stage. Promoting load/store
-// instructions beforehand will allow us to easily trace back to loadInput from
-// function call.
-namespace {
-
-class DxilLegalizeEvalOperations : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  explicit DxilLegalizeEvalOperations() : ModulePass(ID) {}
-
-  const char *getPassName() const override {
-    return "DXIL Legalize EvalOperations";
-  }
-
-  bool runOnModule(Module &M) override {
-    for (Function &F : M.getFunctionList()) {
-      hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
-      if (group != HLOpcodeGroup::NotHL) {
-        std::vector<CallInst *> EvalFunctionCalls;
-        // Find all EvaluateAttribute calls
-        for (User *U : F.users()) {
-          if (CallInst *CI = dyn_cast<CallInst>(U)) {
-            IntrinsicOp evalOp =
-                static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
-            if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
-                evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
-                evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped ||
-                evalOp == IntrinsicOp::IOP_GetAttributeAtVertex) {
-              EvalFunctionCalls.push_back(CI);
-            }
-          }
-        }
-        if (EvalFunctionCalls.empty()) {
-          continue;
-        }
-        // Start from the call instruction, find all allocas that this call
-        // uses.
-        std::unordered_set<AllocaInst *> allocas;
-        for (CallInst *CI : EvalFunctionCalls) {
-          FindAllocasForEvalOperations(CI, allocas);
-        }
-        SSAUpdater SSA;
-        SmallVector<Instruction *, 4> Insts;
-        for (AllocaInst *AI : allocas) {
-          for (User *user : AI->users()) {
-            if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
-              Insts.emplace_back(cast<Instruction>(user));
-            }
-          }
-          LoadAndStorePromoter(Insts, SSA).run(Insts);
-          Insts.clear();
-        }
-      }
-    }
-    return true;
-  }
-
-private:
-  void FindAllocasForEvalOperations(Value *val,
-                                    std::unordered_set<AllocaInst *> &allocas);
-};
-
-char DxilLegalizeEvalOperations::ID = 0;
-
-// Find allocas for EvaluateAttribute operations
-void DxilLegalizeEvalOperations::FindAllocasForEvalOperations(
-    Value *val, std::unordered_set<AllocaInst *> &allocas) {
-  Value *CurVal = val;
-  while (!isa<AllocaInst>(CurVal)) {
-    if (CallInst *CI = dyn_cast<CallInst>(CurVal)) {
-      CurVal = CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx);
-    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(CurVal)) {
-      Value *arg0 =
-          IE->getOperand(0); // Could be another insertelement or undef
-      Value *arg1 = IE->getOperand(1);
-      FindAllocasForEvalOperations(arg0, allocas);
-      CurVal = arg1;
-    } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
-      Value *arg0 = SV->getOperand(0);
-      Value *arg1 = SV->getOperand(1);
-      FindAllocasForEvalOperations(
-          arg0, allocas); // Shuffle vector could come from different allocas
-      CurVal = arg1;
-    } else if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(CurVal)) {
-      CurVal = EE->getOperand(0);
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurVal)) {
-      CurVal = LI->getOperand(0);
-    } else {
-      break;
-    }
-  }
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(CurVal)) {
-    allocas.insert(AI);
-  }
-}
-} // namespace
-
-ModulePass *llvm::createDxilLegalizeEvalOperationsPass() {
-  return new DxilLegalizeEvalOperations();
-}
-
-INITIALIZE_PASS(DxilLegalizeEvalOperations,
-                "hlsl-dxil-legalize-eval-operations",
-                "DXIL legalize eval operations", false, false)
-
-///////////////////////////////////////////////////////////////////////////////
-// Translate RawBufferLoad/RawBufferStore
-// This pass is to make sure that we generate correct buffer load for DXIL
-// For DXIL < 1.2, rawBufferLoad will be translated to BufferLoad instruction
-// without mask.
-// For DXIL >= 1.2, if min precision is enabled, currently generation pass is
-// producing i16/f16 return type for min precisions. For rawBuffer, we will
-// change this so that min precisions are returning its actual scalar type (i32/f32)
-// and will be truncated to their corresponding types after loading / before storing.
-namespace {
-
-// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
-void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
-                            unsigned size, MutableArrayRef<Value *> resultElts,
-                            hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  Type *i64Ty = Builder.getInt64Ty();
-  Type *doubleTy = Builder.getDoubleTy();
-  if (EltTy == doubleTy) {
-    Function *makeDouble =
-        hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
-    Value *makeDoubleOpArg =
-        Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
-    for (unsigned i = 0; i < size; i++) {
-      Value *lo = resultElts32[2 * i];
-      Value *hi = resultElts32[2 * i + 1];
-      Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
-      resultElts[i] = V;
-    }
-  } else {
-    for (unsigned i = 0; i < size; i++) {
-      Value *lo = resultElts32[2 * i];
-      Value *hi = resultElts32[2 * i + 1];
-      lo = Builder.CreateZExt(lo, i64Ty);
-      hi = Builder.CreateZExt(hi, i64Ty);
-      hi = Builder.CreateShl(hi, 32);
-      resultElts[i] = Builder.CreateOr(lo, hi);
-    }
-  }
-}
-
-// Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
-void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
-                           MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
-                           IRBuilder<> &Builder) {
-  Type *i32Ty = Builder.getInt32Ty();
-  Type *doubleTy = Builder.getDoubleTy();
-  Value *undefI32 = UndefValue::get(i32Ty);
-
-  if (EltTy == doubleTy) {
-    Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
-    Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
-    for (unsigned i = 0; i < size; i++) {
-      if (isa<UndefValue>(vals[i])) {
-        vals32[2 * i] = undefI32;
-        vals32[2 * i + 1] = undefI32;
-      } else {
-        Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
-        Value *lo = Builder.CreateExtractValue(retVal, 0);
-        Value *hi = Builder.CreateExtractValue(retVal, 1);
-        vals32[2 * i] = lo;
-        vals32[2 * i + 1] = hi;
-      }
-    }
-  } else {
-    for (unsigned i = 0; i < size; i++) {
-      if (isa<UndefValue>(vals[i])) {
-        vals32[2 * i] = undefI32;
-        vals32[2 * i + 1] = undefI32;
-      } else {
-        Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
-        Value *hi = Builder.CreateLShr(vals[i], 32);
-        hi = Builder.CreateTrunc(hi, i32Ty);
-        vals32[2 * i] = lo;
-        vals32[2 * i + 1] = hi;
-      }
-    }
-  }
-}
-
-class DxilTranslateRawBuffer : public ModulePass {
-public:
-  static char ID;
-  explicit DxilTranslateRawBuffer() : ModulePass(ID) {}
-  bool runOnModule(Module &M) {
-    unsigned major, minor;
-    DxilModule &DM = M.GetDxilModule();
-    DM.GetDxilVersion(major, minor);
-    OP *hlslOP = DM.GetOP();
-    // Split 64bit for shader model less than 6.3.
-    if (major == 1 && minor <= 2) {
-      for (auto F = M.functions().begin(); F != M.functions().end();) {
-        Function *func = &*(F++);
-        DXIL::OpCodeClass opClass;
-        if (hlslOP->GetOpCodeClass(func, opClass)) {
-          if (opClass == DXIL::OpCodeClass::RawBufferLoad) {
-            Type *ETy =
-                hlslOP->GetOverloadType(DXIL::OpCode::RawBufferLoad, func);
-
-            bool is64 =
-                ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
-            if (is64) {
-              ReplaceRawBufferLoad64Bit(func, ETy, M);
-              func->eraseFromParent();
-            }
-          } else if (opClass == DXIL::OpCodeClass::RawBufferStore) {
-            Type *ETy =
-                hlslOP->GetOverloadType(DXIL::OpCode::RawBufferStore, func);
-
-            bool is64 =
-                ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
-            if (is64) {
-              ReplaceRawBufferStore64Bit(func, ETy, M);
-              func->eraseFromParent();
-            }
-          }
-        }
-      }
-    }
-    if (major == 1 && minor < 2) {
-      for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
-        Function *func = &*(F++);
-        if (func->hasName()) {
-          if (func->getName().startswith("dx.op.rawBufferLoad")) {
-            ReplaceRawBufferLoad(func, M);
-            func->eraseFromParent();
-          } else if (func->getName().startswith("dx.op.rawBufferStore")) {
-            ReplaceRawBufferStore(func, M);
-            func->eraseFromParent();
-          }
-        }
-      }
-    } else if (M.GetDxilModule().GetUseMinPrecision()) {
-      for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
-        Function *func = &*(F++);
-        if (func->hasName()) {
-          if (func->getName().startswith("dx.op.rawBufferLoad")) {
-            ReplaceMinPrecisionRawBufferLoad(func, M);
-          } else if (func->getName().startswith("dx.op.rawBufferStore")) {
-            ReplaceMinPrecisionRawBufferStore(func, M);
-          }
-        }
-      }
-    }
-    return true;
-  }
-
-private:
-  // Replace RawBufferLoad/Store to BufferLoad/Store for DXIL < 1.2
-  void ReplaceRawBufferLoad(Function *F, Module &M);
-  void ReplaceRawBufferStore(Function *F, Module &M);
-  void ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy, Module &M);
-  void ReplaceRawBufferStore64Bit(Function *F, Type *EltTy, Module &M);
-  // Replace RawBufferLoad/Store of min-precision types to have its actual storage size
-  void ReplaceMinPrecisionRawBufferLoad(Function *F, Module &M);
-  void ReplaceMinPrecisionRawBufferStore(Function *F, Module &M);
-  void ReplaceMinPrecisionRawBufferLoadByType(Function *F, Type *FromTy,
-                                              Type *ToTy, OP *Op,
-                                              const DataLayout &DL);
-};
-} // namespace
-
-void DxilTranslateRawBuffer::ReplaceRawBufferLoad(Function *F,
-                                                                Module &M) {
-  OP *op = M.GetDxilModule().GetOP();
-  Type *RTy = F->getReturnType();
-  if (StructType *STy = dyn_cast<StructType>(RTy)) {
-    Type *ETy = STy->getElementType(0);
-    Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferLoad, ETy);
-    for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
-      User *user = *(U++);
-      if (CallInst *CI = dyn_cast<CallInst>(user)) {
-        IRBuilder<> Builder(CI);
-        SmallVector<Value *, 4> args;
-        args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
-        for (unsigned i = 1; i < 4; ++i) {
-          args.emplace_back(CI->getArgOperand(i));
-        }
-        CallInst *newCall = Builder.CreateCall(newFunction, args);
-        CI->replaceAllUsesWith(newCall);
-        CI->eraseFromParent();
-      } else {
-        DXASSERT(false, "function can only be used with call instructions.");
-      }
-    }
-  } else {
-    DXASSERT(false, "RawBufferLoad should return struct type.");
-  }
-}
-
-void DxilTranslateRawBuffer::ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy, Module &M) {
-  OP *hlslOP = M.GetDxilModule().GetOP();
-  Function *bufLd = hlslOP->GetOpFunc(DXIL::OpCode::RawBufferLoad,
-                                      Type::getInt32Ty(M.getContext()));
-  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
-    User *user = *(U++);
-    if (CallInst *CI = dyn_cast<CallInst>(user)) {
-      IRBuilder<> Builder(CI);
-      SmallVector<Value *, 4> args(CI->arg_operands());
-
-      Value *offset = CI->getArgOperand(
-          DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx);
-
-      unsigned size = 0;
-      bool bNeedStatus = false;
-      for (User *U : CI->users()) {
-        ExtractValueInst *Elt = cast<ExtractValueInst>(U);
-        DXASSERT(Elt->getNumIndices() == 1, "else invalid use for resRet");
-        unsigned idx = Elt->getIndices()[0];
-        if (idx == 4) {
-          bNeedStatus = true;
-        } else {
-          size = std::max(size, idx+1);
-        }
-      }
-      unsigned maskHi = 0;
-      unsigned maskLo = 0;
-      switch (size) {
-      case 1:
-        maskLo = 3;
-        break;
-      case 2:
-        maskLo = 0xf;
-        break;
-      case 3:
-        maskLo = 0xf;
-        maskHi = 3;
-        break;
-      case 4:
-        maskLo = 0xf;
-        maskHi = 0xf;
-        break;
-      }
-
-      args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
-          Builder.getInt8(maskLo);
-      Value *resultElts[5] = {nullptr, nullptr, nullptr, nullptr, nullptr};
-      CallInst *newLd = Builder.CreateCall(bufLd, args);
-
-      Value *resultElts32[8];
-      unsigned eltBase = 0;
-      for (unsigned i = 0; i < size; i++) {
-        if (i == 2) {
-          // Update offset 4 by 4 bytes.
-          if (isa<UndefValue>(offset)) {
-            // [RW]ByteAddressBuffer has undef element offset -> update index
-            Value *index = CI->getArgOperand(DXIL::OperandIndex::kRawBufferLoadIndexOpIdx);
-            args[DXIL::OperandIndex::kRawBufferLoadIndexOpIdx] =
-              Builder.CreateAdd(index, Builder.getInt32(4 * 4));
-          }
-          else {
-            // [RW]StructuredBuffer -> update element offset
-            args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
-              Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
-          }
-          args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
-              Builder.getInt8(maskHi);
-          newLd = Builder.CreateCall(bufLd, args);
-          eltBase = 4;
-        }
-        unsigned resBase = 2 * i;
-        resultElts32[resBase] =
-            Builder.CreateExtractValue(newLd, resBase - eltBase);
-        resultElts32[resBase + 1] =
-            Builder.CreateExtractValue(newLd, resBase + 1 - eltBase);
-      }
-
-      Make64bitResultForLoad(EltTy, resultElts32, size, resultElts, hlslOP, Builder);
-      if (bNeedStatus) {
-        resultElts[4] = Builder.CreateExtractValue(newLd, 4);
-      }
-      for (auto it = CI->user_begin(); it != CI->user_end(); ) {
-        ExtractValueInst *Elt = cast<ExtractValueInst>(*(it++));
-        DXASSERT(Elt->getNumIndices() == 1, "else invalid use for resRet");
-        unsigned idx = Elt->getIndices()[0];
-        if (!Elt->user_empty()) {
-          Value *newElt = resultElts[idx];
-          Elt->replaceAllUsesWith(newElt);
-        }
-        Elt->eraseFromParent();
-      }
-
-      CI->eraseFromParent();
-    } else {
-      DXASSERT(false, "function can only be used with call instructions.");
-    }
-  }
-}
-
-void DxilTranslateRawBuffer::ReplaceRawBufferStore(Function *F,
-  Module &M) {
-  OP *op = M.GetDxilModule().GetOP();
-  DXASSERT(F->getReturnType()->isVoidTy(), "rawBufferStore should return a void type.");
-  Type *ETy = F->getFunctionType()->getParamType(4); // value
-  Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferStore, ETy);
-  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
-    User *user = *(U++);
-    if (CallInst *CI = dyn_cast<CallInst>(user)) {
-      IRBuilder<> Builder(CI);
-      SmallVector<Value *, 4> args;
-      args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferStore));
-      for (unsigned i = 1; i < 9; ++i) {
-        args.emplace_back(CI->getArgOperand(i));
-      }
-      Builder.CreateCall(newFunction, args);
-      CI->eraseFromParent();
-    }
-    else {
-      DXASSERT(false, "function can only be used with call instructions.");
-    }
-  }
-}
-
-void DxilTranslateRawBuffer::ReplaceRawBufferStore64Bit(Function *F, Type *ETy,
-                                                        Module &M) {
-  OP *hlslOP = M.GetDxilModule().GetOP();
-  Function *newFunction = hlslOP->GetOpFunc(hlsl::DXIL::OpCode::RawBufferStore,
-                                            Type::getInt32Ty(M.getContext()));
-  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
-    User *user = *(U++);
-    if (CallInst *CI = dyn_cast<CallInst>(user)) {
-      IRBuilder<> Builder(CI);
-      SmallVector<Value *, 4> args(CI->arg_operands());
-      Value *vals[4] = {
-          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal0OpIdx),
-          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal1OpIdx),
-          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal2OpIdx),
-          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal3OpIdx)};
-      ConstantInt *cMask = cast<ConstantInt>(
-          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreMaskOpIdx));
-      Value *undefI32 = UndefValue::get(Builder.getInt32Ty());
-      Value *vals32[8] = {undefI32, undefI32, undefI32, undefI32,
-                          undefI32, undefI32, undefI32, undefI32};
-
-      unsigned maskLo = 0;
-      unsigned maskHi = 0;
-      unsigned size = 0;
-      unsigned mask = cMask->getLimitedValue();
-      switch (mask) {
-      case 1:
-        maskLo = 3;
-        size = 1;
-        break;
-      case 3:
-        maskLo = 15;
-        size = 2;
-        break;
-      case 7:
-        maskLo = 15;
-        maskHi = 3;
-        size = 3;
-        break;
-      case 15:
-        maskLo = 15;
-        maskHi = 15;
-        size = 4;
-        break;
-      default:
-        DXASSERT(0, "invalid mask");
-      }
-
-      Split64bitValForStore(ETy, vals, size, vals32, hlslOP, Builder);
-      args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
-          Builder.getInt8(maskLo);
-      args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[0];
-      args[DXIL::OperandIndex::kRawBufferStoreVal1OpIdx] = vals32[1];
-      args[DXIL::OperandIndex::kRawBufferStoreVal2OpIdx] = vals32[2];
-      args[DXIL::OperandIndex::kRawBufferStoreVal3OpIdx] = vals32[3];
-
-      Builder.CreateCall(newFunction, args);
-
-      if (maskHi) {
-        // Update offset 4 by 4 bytes.
-        Value *offset = args[DXIL::OperandIndex::kBufferStoreCoord1OpIdx];
-        if (isa<UndefValue>(offset)) {
-          // [RW]ByteAddressBuffer has element offset == undef -> update index instead
-          Value *index = args[DXIL::OperandIndex::kBufferStoreCoord0OpIdx];
-          index = Builder.CreateAdd(index, Builder.getInt32(4 * 4));
-          args[DXIL::OperandIndex::kRawBufferStoreIndexOpIdx] = index;
-        }
-        else {
-          // [RW]StructuredBuffer -> update element offset
-          offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
-          args[DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx] = offset;
-        }
-        
-        args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
-            Builder.getInt8(maskHi);
-        args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[4];
-        args[DXIL::OperandIndex::kRawBufferStoreVal1OpIdx] = vals32[5];
-        args[DXIL::OperandIndex::kRawBufferStoreVal2OpIdx] = vals32[6];
-        args[DXIL::OperandIndex::kRawBufferStoreVal3OpIdx] = vals32[7];
-
-        Builder.CreateCall(newFunction, args);
-      }
-      CI->eraseFromParent();
-    } else {
-      DXASSERT(false, "function can only be used with call instructions.");
-    }
-  }
-}
-
-void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoad(Function *F,
-                                                              Module &M) {
-  OP *Op = M.GetDxilModule().GetOP();
-  Type *RetTy = F->getReturnType();
-  if (StructType *STy = dyn_cast<StructType>(RetTy)) {
-    Type *EltTy = STy->getElementType(0);
-    if (EltTy->isHalfTy()) {
-      ReplaceMinPrecisionRawBufferLoadByType(F, Type::getHalfTy(M.getContext()),
-                                             Type::getFloatTy(M.getContext()),
-                                             Op, M.getDataLayout());
-    } else if (EltTy == Type::getInt16Ty(M.getContext())) {
-      ReplaceMinPrecisionRawBufferLoadByType(
-          F, Type::getInt16Ty(M.getContext()), Type::getInt32Ty(M.getContext()),
-          Op, M.getDataLayout());
-    }
-  } else {
-    DXASSERT(false, "RawBufferLoad should return struct type.");
-  }
-}
-
-void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferStore(Function *F,
-                                                              Module &M) {
-  DXASSERT(F->getReturnType()->isVoidTy(), "rawBufferStore should return a void type.");
-  Type *ETy = F->getFunctionType()->getParamType(4); // value
-  Type *NewETy;
-  if (ETy->isHalfTy()) {
-    NewETy = Type::getFloatTy(M.getContext());
-  }
-  else if (ETy == Type::getInt16Ty(M.getContext())) {
-    NewETy = Type::getInt32Ty(M.getContext());
-  }
-  else {
-    return; // not a min precision type
-  }
-  Function *newFunction = M.GetDxilModule().GetOP()->GetOpFunc(
-      DXIL::OpCode::RawBufferStore, NewETy);
-  // for each function
-  // add argument 4-7 to its upconverted values
-  // replace function call
-  for (auto FuncUser = F->user_begin(), FuncEnd = F->user_end(); FuncUser != FuncEnd;) {
-    CallInst *CI = dyn_cast<CallInst>(*(FuncUser++));
-    DXASSERT(CI, "function user must be a call instruction.");
-    IRBuilder<> CIBuilder(CI);
-    SmallVector<Value *, 9> Args;
-    for (unsigned i = 0; i < 4; ++i) {
-      Args.emplace_back(CI->getArgOperand(i));
-    }
-    // values to store should be converted to its higher precision types
-    if (ETy->isHalfTy()) {
-      for (unsigned i = 4; i < 8; ++i) {
-        Value *NewV = CIBuilder.CreateFPExt(CI->getArgOperand(i),
-                                            Type::getFloatTy(M.getContext()));
-        Args.emplace_back(NewV);
-      }
-    }
-    else if (ETy == Type::getInt16Ty(M.getContext())) {
-      // This case only applies to typed buffer since Store operation of byte
-      // address buffer for min precision is handled by implicit conversion on
-      // intrinsic call. Since we are extending integer, we have to know if we
-      // should sign ext or zero ext. We can do this by iterating checking the
-      // size of the element at struct type and comp type at type annotation
-      CallInst *handleCI = dyn_cast<CallInst>(CI->getArgOperand(1));
-      DXASSERT(handleCI, "otherwise handle was not an argument to buffer store.");
-      ConstantInt *resClass = dyn_cast<ConstantInt>(handleCI->getArgOperand(1));
-      DXASSERT_LOCALVAR(resClass, resClass && resClass->getSExtValue() ==
-                               (unsigned)DXIL::ResourceClass::UAV,
-               "otherwise buffer store called on non uav kind.");
-      ConstantInt *rangeID = dyn_cast<ConstantInt>(handleCI->getArgOperand(2)); // range id or idx?
-      DXASSERT(rangeID, "wrong createHandle call.");
-      DxilResource dxilRes = M.GetDxilModule().GetUAV(rangeID->getSExtValue());
-      StructType *STy = dyn_cast<StructType>(dxilRes.GetRetType());
-      DxilStructAnnotation *SAnnot = M.GetDxilModule().GetTypeSystem().GetStructAnnotation(STy);
-      ConstantInt *offsetInt = dyn_cast<ConstantInt>(CI->getArgOperand(3));
-      unsigned offset = offsetInt->getSExtValue();
-      unsigned currentOffset = 0;
-      for (DxilStructTypeIterator iter = begin(STy, SAnnot), ItEnd = end(STy, SAnnot); iter != ItEnd; ++iter) {
-        std::pair<Type *, DxilFieldAnnotation*> pair = *iter;
-        currentOffset += M.getDataLayout().getTypeAllocSize(pair.first);
-        if (currentOffset > offset) {
-          if (pair.second->GetCompType().IsUIntTy()) {
-            for (unsigned i = 4; i < 8; ++i) {
-              Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
-              Args.emplace_back(NewV);
-            }
-            break;
-          }
-          else if (pair.second->GetCompType().IsIntTy()) {
-            for (unsigned i = 4; i < 8; ++i) {
-              Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
-              Args.emplace_back(NewV);
-            }
-            break;
-          }
-          else {
-            DXASSERT(false, "Invalid comp type");
-          }
-        }
-      }
-    }
-
-    // mask
-    Args.emplace_back(CI->getArgOperand(8));
-    // alignment
-    Args.emplace_back(M.GetDxilModule().GetOP()->GetI32Const(
-        M.getDataLayout().getTypeAllocSize(NewETy)));
-    CIBuilder.CreateCall(newFunction, Args);
-    CI->eraseFromParent();
-   }
-}
-
-
-void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoadByType(
-    Function *F, Type *FromTy, Type *ToTy, OP *Op, const DataLayout &DL) {
-  Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferLoad, ToTy);
-  for (auto FUser = F->user_begin(), FEnd = F->user_end(); FUser != FEnd;) {
-    User *UserCI = *(FUser++);
-    if (CallInst *CI = dyn_cast<CallInst>(UserCI)) {
-      IRBuilder<> CIBuilder(CI);
-      SmallVector<Value *, 5> newFuncArgs;
-      // opcode, handle, index, elementOffset, mask
-      // Compiler is generating correct element offset even for min precision types
-      // So no need to recalculate here
-      for (unsigned i = 0; i < 5; ++i) {
-        newFuncArgs.emplace_back(CI->getArgOperand(i));
-      }
-      // new alignment for new type
-      newFuncArgs.emplace_back(Op->GetI32Const(DL.getTypeAllocSize(ToTy)));
-      CallInst *newCI = CIBuilder.CreateCall(newFunction, newFuncArgs);
-      for (auto CIUser = CI->user_begin(), CIEnd = CI->user_end();
-           CIUser != CIEnd;) {
-        User *UserEV = *(CIUser++);
-        if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(UserEV)) {
-          IRBuilder<> EVBuilder(EV);
-          ArrayRef<unsigned> Indices = EV->getIndices();
-          DXASSERT(Indices.size() == 1, "Otherwise we have wrong extract value.");
-          Value *newEV = EVBuilder.CreateExtractValue(newCI, Indices);
-          Value *newTruncV = nullptr;
-          if (4 == Indices[0]) { // Don't truncate status
-            newTruncV = newEV;
-          }
-          else if (FromTy->isHalfTy()) {
-            newTruncV = EVBuilder.CreateFPTrunc(newEV, FromTy);
-          } else if (FromTy->isIntegerTy()) {
-            newTruncV = EVBuilder.CreateTrunc(newEV, FromTy);
-          } else {
-            DXASSERT(false, "unexpected type conversion");
-          }
-          EV->replaceAllUsesWith(newTruncV);
-          EV->eraseFromParent();
-        }
-      }
-      CI->eraseFromParent();
-    }
-  }
-  F->eraseFromParent();
-}
-
-char DxilTranslateRawBuffer::ID = 0;
-ModulePass *llvm::createDxilTranslateRawBuffer() {
-  return new DxilTranslateRawBuffer();
-}
-
-INITIALIZE_PASS(DxilTranslateRawBuffer, "hlsl-translate-dxil-raw-buffer",
-                "Translate raw buffer load", false, false)

+ 131 - 0
lib/HLSL/DxilLegalizeEvalOperations.cpp

@@ -0,0 +1,131 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilLegalizeEvalOperations.cpp                                            //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HlslIntrinsicOp.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLOperations.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <unordered_set>
+#include <vector>
+
+using namespace llvm;
+using namespace hlsl;
+
+// Make sure src of EvalOperations are from function parameter.
+// This is needed in order to translate EvaluateAttribute operations that traces
+// back to LoadInput operations during translation stage. Promoting load/store
+// instructions beforehand will allow us to easily trace back to loadInput from
+// function call.
+namespace {
+
+class DxilLegalizeEvalOperations : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilLegalizeEvalOperations() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DXIL Legalize EvalOperations";
+  }
+
+  bool runOnModule(Module &M) override {
+    for (Function &F : M.getFunctionList()) {
+      hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
+      if (group != HLOpcodeGroup::NotHL) {
+        std::vector<CallInst *> EvalFunctionCalls;
+        // Find all EvaluateAttribute calls
+        for (User *U : F.users()) {
+          if (CallInst *CI = dyn_cast<CallInst>(U)) {
+            IntrinsicOp evalOp =
+                static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
+            if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
+                evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
+                evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped ||
+                evalOp == IntrinsicOp::IOP_GetAttributeAtVertex) {
+              EvalFunctionCalls.push_back(CI);
+            }
+          }
+        }
+        if (EvalFunctionCalls.empty()) {
+          continue;
+        }
+        // Start from the call instruction, find all allocas that this call
+        // uses.
+        std::unordered_set<AllocaInst *> allocas;
+        for (CallInst *CI : EvalFunctionCalls) {
+          FindAllocasForEvalOperations(CI, allocas);
+        }
+        SSAUpdater SSA;
+        SmallVector<Instruction *, 4> Insts;
+        for (AllocaInst *AI : allocas) {
+          for (User *user : AI->users()) {
+            if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
+              Insts.emplace_back(cast<Instruction>(user));
+            }
+          }
+          LoadAndStorePromoter(Insts, SSA).run(Insts);
+          Insts.clear();
+        }
+      }
+    }
+    return true;
+  }
+
+private:
+  void FindAllocasForEvalOperations(Value *val,
+                                    std::unordered_set<AllocaInst *> &allocas);
+};
+
+char DxilLegalizeEvalOperations::ID = 0;
+
+// Find allocas for EvaluateAttribute operations
+void DxilLegalizeEvalOperations::FindAllocasForEvalOperations(
+    Value *val, std::unordered_set<AllocaInst *> &allocas) {
+  Value *CurVal = val;
+  while (!isa<AllocaInst>(CurVal)) {
+    if (CallInst *CI = dyn_cast<CallInst>(CurVal)) {
+      CurVal = CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(CurVal)) {
+      Value *arg0 =
+          IE->getOperand(0); // Could be another insertelement or undef
+      Value *arg1 = IE->getOperand(1);
+      FindAllocasForEvalOperations(arg0, allocas);
+      CurVal = arg1;
+    } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
+      Value *arg0 = SV->getOperand(0);
+      Value *arg1 = SV->getOperand(1);
+      FindAllocasForEvalOperations(
+          arg0, allocas); // Shuffle vector could come from different allocas
+      CurVal = arg1;
+    } else if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(CurVal)) {
+      CurVal = EE->getOperand(0);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurVal)) {
+      CurVal = LI->getOperand(0);
+    } else {
+      break;
+    }
+  }
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(CurVal)) {
+    allocas.insert(AI);
+  }
+}
+} // namespace
+
+ModulePass *llvm::createDxilLegalizeEvalOperationsPass() {
+  return new DxilLegalizeEvalOperations();
+}
+
+INITIALIZE_PASS(DxilLegalizeEvalOperations,
+                "hlsl-dxil-legalize-eval-operations",
+                "DXIL legalize eval operations", false, false)

+ 168 - 0
lib/HLSL/DxilPrecisePropagatePass.cpp

@@ -0,0 +1,168 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilPrecisePropagatePass.cpp                                              //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLModule.h"
+#include "dxc/HLSL/HLOperations.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include <unordered_set>
+#include <vector>
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+class DxilPrecisePropagatePass : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilPrecisePropagatePass() : ModulePass(ID) {}
+
+  const char *getPassName() const override { return "DXIL Precise Propagate"; }
+
+  bool runOnModule(Module &M) override {
+    DxilModule &dxilModule = M.GetOrCreateDxilModule();
+    DxilTypeSystem &typeSys = dxilModule.GetTypeSystem();
+    std::unordered_set<Instruction*> processedSet;
+    std::vector<Function*> deadList;
+    for (Function &F : M.functions()) {
+      if (HLModule::HasPreciseAttribute(&F)) {
+        PropagatePreciseOnFunctionUser(F, typeSys, processedSet);
+        deadList.emplace_back(&F);
+      }
+    }
+    for (Function *F : deadList)
+      F->eraseFromParent();
+    return true;
+  }
+
+private:
+  void PropagatePreciseOnFunctionUser(
+      Function &F, DxilTypeSystem &typeSys,
+      std::unordered_set<Instruction *> &processedSet);
+};
+
+char DxilPrecisePropagatePass::ID = 0;
+
+}
+
+static void PropagatePreciseAttribute(Instruction *I, DxilTypeSystem &typeSys,
+    std::unordered_set<Instruction *> &processedSet);
+
+static void PropagatePreciseAttributeOnOperand(
+    Value *V, DxilTypeSystem &typeSys, LLVMContext &Context,
+    std::unordered_set<Instruction *> &processedSet) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  // Skip none inst.
+  if (!I)
+    return;
+
+  FPMathOperator *FPMath = dyn_cast<FPMathOperator>(I);
+  // Skip none FPMath
+  if (!FPMath)
+    return;
+
+  // Skip inst already marked.
+  if (processedSet.count(I) > 0)
+    return;
+  // TODO: skip precise on integer type, sample instruction...
+  processedSet.insert(I);
+  // Set precise fast math on those instructions that support it.
+  if (DxilModule::PreservesFastMathFlags(I))
+    DxilModule::SetPreciseFastMathFlags(I);
+
+  // Fast math not work on call, use metadata.
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    HLModule::MarkPreciseAttributeWithMetadata(CI);
+  PropagatePreciseAttribute(I, typeSys, processedSet);
+}
+
+static void PropagatePreciseAttributeOnPointer(
+    Value *Ptr, DxilTypeSystem &typeSys, LLVMContext &Context,
+    std::unordered_set<Instruction *> &processedSet) {
+  // Find all store and propagate on the val operand of store.
+  // For CallInst, if Ptr is used as out parameter, mark it.
+  for (User *U : Ptr->users()) {
+    Instruction *user = cast<Instruction>(U);
+    if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
+      Value *val = stInst->getValueOperand();
+      PropagatePreciseAttributeOnOperand(val, typeSys, Context, processedSet);
+    } else if (CallInst *CI = dyn_cast<CallInst>(user)) {
+      bool bReadOnly = true;
+
+      Function *F = CI->getCalledFunction();
+      const DxilFunctionAnnotation *funcAnnotation =
+          typeSys.GetFunctionAnnotation(F);
+      for (unsigned i = 0; i < CI->getNumArgOperands(); ++i) {
+        if (Ptr != CI->getArgOperand(i))
+          continue;
+
+        const DxilParameterAnnotation &paramAnnotation =
+            funcAnnotation->GetParameterAnnotation(i);
+        // OutputPatch and OutputStream will be checked after scalar repl.
+        // Here only check out/inout
+        if (paramAnnotation.GetParamInputQual() == DxilParamInputQual::Out ||
+            paramAnnotation.GetParamInputQual() == DxilParamInputQual::Inout) {
+          bReadOnly = false;
+          break;
+        }
+      }
+
+      if (!bReadOnly)
+        PropagatePreciseAttributeOnOperand(CI, typeSys, Context, processedSet);
+    }
+  }
+}
+
+static void
+PropagatePreciseAttribute(Instruction *I, DxilTypeSystem &typeSys,
+                          std::unordered_set<Instruction *> &processedSet) {
+  LLVMContext &Context = I->getContext();
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+    PropagatePreciseAttributeOnPointer(AI, typeSys, Context, processedSet);
+  } else if (dyn_cast<CallInst>(I)) {
+    // Propagate every argument.
+    // TODO: only propagate precise argument.
+    for (Value *src : I->operands())
+      PropagatePreciseAttributeOnOperand(src, typeSys, Context, processedSet);
+  } else if (dyn_cast<FPMathOperator>(I)) {
+    // TODO: only propagate precise argument.
+    for (Value *src : I->operands())
+      PropagatePreciseAttributeOnOperand(src, typeSys, Context, processedSet);
+  } else if (LoadInst *ldInst = dyn_cast<LoadInst>(I)) {
+    Value *Ptr = ldInst->getPointerOperand();
+    PropagatePreciseAttributeOnPointer(Ptr, typeSys, Context, processedSet);
+  } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    PropagatePreciseAttributeOnPointer(GEP, typeSys, Context, processedSet);
+  // TODO: support more case which need
+}
+
+void DxilPrecisePropagatePass::PropagatePreciseOnFunctionUser(
+    Function &F, DxilTypeSystem &typeSys,
+    std::unordered_set<Instruction *> &processedSet) {
+  LLVMContext &Context = F.getContext();
+  for (auto U = F.user_begin(), E = F.user_end(); U != E;) {
+    CallInst *CI = cast<CallInst>(*(U++));
+    Value *V = CI->getArgOperand(0);
+    PropagatePreciseAttributeOnOperand(V, typeSys, Context, processedSet);
+    CI->eraseFromParent();
+  }
+}
+
+ModulePass *llvm::createDxilPrecisePropagatePass() {
+  return new DxilPrecisePropagatePass();
+}
+
+INITIALIZE_PASS(DxilPrecisePropagatePass, "hlsl-dxil-precise", "DXIL precise attribute propagate", false, false)

+ 214 - 0
lib/HLSL/DxilPromoteResourcePasses.cpp

@@ -0,0 +1,214 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilPromoteResourcePasses.cpp                                             //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilUtil.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLModule.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <unordered_set>
+#include <vector>
+
+using namespace llvm;
+using namespace hlsl;
+
+// Legalize resource use.
+// Map local or static global resource to global resource.
+// Require inline for static global resource.
+
+namespace {
+
+static const StringRef kStaticResourceLibErrorMsg = "static global resource use is disallowed in library exports.";
+
+class DxilPromoteStaticResources : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilPromoteStaticResources()
+      : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DXIL Legalize Static Resource Use";
+  }
+
+  bool runOnModule(Module &M) override {
+    // Promote static global variables.
+    return PromoteStaticGlobalResources(M);
+  }
+
+private:
+  bool PromoteStaticGlobalResources(Module &M);
+};
+
+char DxilPromoteStaticResources::ID = 0;
+
+class DxilPromoteLocalResources : public FunctionPass {
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilPromoteLocalResources()
+      : FunctionPass(ID) {}
+
+  const char *getPassName() const override {
+    return "DXIL Legalize Resource Use";
+  }
+
+  bool runOnFunction(Function &F) override {
+    // Promote local resource first.
+    return PromoteLocalResource(F);
+  }
+
+private:
+  bool PromoteLocalResource(Function &F);
+};
+
+char DxilPromoteLocalResources::ID = 0;
+
+}
+
+void DxilPromoteLocalResources::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.setPreservesAll();
+}
+
+bool DxilPromoteLocalResources::PromoteLocalResource(Function &F) {
+  bool bModified = false;
+  std::vector<AllocaInst *> Allocas;
+  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  AssumptionCache &AC =
+      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+  BasicBlock &BB = F.getEntryBlock();
+  unsigned allocaSize = 0;
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { // Is it an alloca?
+        if (dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(AI->getAllocatedType()))) {
+          if (isAllocaPromotable(AI))
+            Allocas.push_back(AI);
+        }
+      }
+    if (Allocas.empty())
+      break;
+
+    // No update.
+    // Report error and break.
+    if (allocaSize == Allocas.size()) {
+      F.getContext().emitError(dxilutil::kResourceMapErrorMsg);
+      break;
+    }
+    allocaSize = Allocas.size();
+
+    PromoteMemToReg(Allocas, *DT, nullptr, &AC);
+    bModified = true;
+  }
+
+  return bModified;
+}
+
+FunctionPass *llvm::createDxilPromoteLocalResources() {
+  return new DxilPromoteLocalResources();
+}
+
+INITIALIZE_PASS_BEGIN(DxilPromoteLocalResources,
+                      "hlsl-dxil-promote-local-resources",
+                      "DXIL promote local resource use", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(DxilPromoteLocalResources,
+                    "hlsl-dxil-promote-local-resources",
+                    "DXIL promote local resource use", false, true)
+
+bool DxilPromoteStaticResources::PromoteStaticGlobalResources(
+    Module &M) {
+  if (M.GetOrCreateHLModule().GetShaderModel()->IsLib()) {
+    // Read/write to global static resource is disallowed for libraries:
+    // Resource use needs to be resolved to a single real global resource,
+    // but it may not be possible since any external function call may re-enter
+    // at any other library export, which could modify the global static
+    // between write and read.
+    // While it could work for certain cases, describing the boundary at
+    // the HLSL level is difficult, so at this point it's better to disallow.
+    // example of what could work:
+    //  After inlining, exported functions must have writes to static globals
+    //  before reads, and must not have any external function calls between
+    //  writes and subsequent reads, such that the static global may be
+    //  optimized away for the exported function.
+    for (auto &GV : M.globals()) {
+      if (GV.getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
+        dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(GV.getType()))) {
+        if (!GV.user_empty()) {
+          if (Instruction *I = dyn_cast<Instruction>(*GV.user_begin())) {
+            dxilutil::EmitErrorOnInstruction(I, kStaticResourceLibErrorMsg);
+            break;
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  bool bModified = false;
+  std::set<GlobalVariable *> staticResources;
+  for (auto &GV : M.globals()) {
+    if (GV.getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
+        dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(GV.getType()))) {
+      staticResources.insert(&GV);
+    }
+  }
+  SSAUpdater SSA;
+  SmallVector<Instruction *, 4> Insts;
+  // Make sure every resource load has mapped to global variable.
+  while (!staticResources.empty()) {
+    bool bUpdated = false;
+    for (auto it = staticResources.begin(); it != staticResources.end();) {
+      GlobalVariable *GV = *(it++);
+      // Build list of instructions to promote.
+      for (User *U : GV->users()) {
+        Instruction *I = cast<Instruction>(U);
+        Insts.emplace_back(I);
+      }
+
+      LoadAndStorePromoter(Insts, SSA).run(Insts);
+      if (GV->user_empty()) {
+        bUpdated = true;
+        staticResources.erase(GV);
+      }
+
+      Insts.clear();
+    }
+    if (!bUpdated) {
+      M.getContext().emitError(dxilutil::kResourceMapErrorMsg);
+      break;
+    }
+    bModified = true;
+  }
+  return bModified;
+}
+
+ModulePass *llvm::createDxilPromoteStaticResources() {
+  return new DxilPromoteStaticResources();
+}
+
+INITIALIZE_PASS(DxilPromoteStaticResources,
+                "hlsl-dxil-promote-static-resources",
+                "DXIL promote static resource use", false, false)

+ 593 - 0
lib/HLSL/DxilTranslateRawBuffer.cpp

@@ -0,0 +1,593 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilTranslateRawBuffer.cpp                                                //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/Support/Global.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include <vector>
+
+using namespace llvm;
+using namespace hlsl;
+
+// Translate RawBufferLoad/RawBufferStore
+// This pass is to make sure that we generate correct buffer load for DXIL
+// For DXIL < 1.2, rawBufferLoad will be translated to BufferLoad instruction
+// without mask.
+// For DXIL >= 1.2, if min precision is enabled, currently generation pass is
+// producing i16/f16 return type for min precisions. For rawBuffer, we will
+// change this so that min precisions are returning its actual scalar type (i32/f32)
+// and will be truncated to their corresponding types after loading / before storing.
+namespace {
+
+// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
+void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
+                            unsigned size, MutableArrayRef<Value *> resultElts,
+                            hlsl::OP *hlslOP, IRBuilder<> &Builder) {
+  Type *i64Ty = Builder.getInt64Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  if (EltTy == doubleTy) {
+    Function *makeDouble =
+        hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
+    Value *makeDoubleOpArg =
+        Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
+    for (unsigned i = 0; i < size; i++) {
+      Value *lo = resultElts32[2 * i];
+      Value *hi = resultElts32[2 * i + 1];
+      Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
+      resultElts[i] = V;
+    }
+  } else {
+    for (unsigned i = 0; i < size; i++) {
+      Value *lo = resultElts32[2 * i];
+      Value *hi = resultElts32[2 * i + 1];
+      lo = Builder.CreateZExt(lo, i64Ty);
+      hi = Builder.CreateZExt(hi, i64Ty);
+      hi = Builder.CreateShl(hi, 32);
+      resultElts[i] = Builder.CreateOr(lo, hi);
+    }
+  }
+}
+
+// Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
+void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
+                           MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
+                           IRBuilder<> &Builder) {
+  Type *i32Ty = Builder.getInt32Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  Value *undefI32 = UndefValue::get(i32Ty);
+
+  if (EltTy == doubleTy) {
+    Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
+    Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
+    for (unsigned i = 0; i < size; i++) {
+      if (isa<UndefValue>(vals[i])) {
+        vals32[2 * i] = undefI32;
+        vals32[2 * i + 1] = undefI32;
+      } else {
+        Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
+        Value *lo = Builder.CreateExtractValue(retVal, 0);
+        Value *hi = Builder.CreateExtractValue(retVal, 1);
+        vals32[2 * i] = lo;
+        vals32[2 * i + 1] = hi;
+      }
+    }
+  } else {
+    for (unsigned i = 0; i < size; i++) {
+      if (isa<UndefValue>(vals[i])) {
+        vals32[2 * i] = undefI32;
+        vals32[2 * i + 1] = undefI32;
+      } else {
+        Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
+        Value *hi = Builder.CreateLShr(vals[i], 32);
+        hi = Builder.CreateTrunc(hi, i32Ty);
+        vals32[2 * i] = lo;
+        vals32[2 * i + 1] = hi;
+      }
+    }
+  }
+}
+
+class DxilTranslateRawBuffer : public ModulePass {
+public:
+  static char ID;
+  explicit DxilTranslateRawBuffer() : ModulePass(ID) {}
+  bool runOnModule(Module &M) {
+    unsigned major, minor;
+    DxilModule &DM = M.GetDxilModule();
+    DM.GetDxilVersion(major, minor);
+    OP *hlslOP = DM.GetOP();
+    // Split 64bit for shader model less than 6.3.
+    if (major == 1 && minor <= 2) {
+      for (auto F = M.functions().begin(); F != M.functions().end();) {
+        Function *func = &*(F++);
+        DXIL::OpCodeClass opClass;
+        if (hlslOP->GetOpCodeClass(func, opClass)) {
+          if (opClass == DXIL::OpCodeClass::RawBufferLoad) {
+            Type *ETy =
+                hlslOP->GetOverloadType(DXIL::OpCode::RawBufferLoad, func);
+
+            bool is64 =
+                ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
+            if (is64) {
+              ReplaceRawBufferLoad64Bit(func, ETy, M);
+              func->eraseFromParent();
+            }
+          } else if (opClass == DXIL::OpCodeClass::RawBufferStore) {
+            Type *ETy =
+                hlslOP->GetOverloadType(DXIL::OpCode::RawBufferStore, func);
+
+            bool is64 =
+                ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
+            if (is64) {
+              ReplaceRawBufferStore64Bit(func, ETy, M);
+              func->eraseFromParent();
+            }
+          }
+        }
+      }
+    }
+    if (major == 1 && minor < 2) {
+      for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
+        Function *func = &*(F++);
+        if (func->hasName()) {
+          if (func->getName().startswith("dx.op.rawBufferLoad")) {
+            ReplaceRawBufferLoad(func, M);
+            func->eraseFromParent();
+          } else if (func->getName().startswith("dx.op.rawBufferStore")) {
+            ReplaceRawBufferStore(func, M);
+            func->eraseFromParent();
+          }
+        }
+      }
+    } else if (M.GetDxilModule().GetUseMinPrecision()) {
+      for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
+        Function *func = &*(F++);
+        if (func->hasName()) {
+          if (func->getName().startswith("dx.op.rawBufferLoad")) {
+            ReplaceMinPrecisionRawBufferLoad(func, M);
+          } else if (func->getName().startswith("dx.op.rawBufferStore")) {
+            ReplaceMinPrecisionRawBufferStore(func, M);
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+private:
+  // Replace RawBufferLoad/Store to BufferLoad/Store for DXIL < 1.2
+  void ReplaceRawBufferLoad(Function *F, Module &M);
+  void ReplaceRawBufferStore(Function *F, Module &M);
+  void ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy, Module &M);
+  void ReplaceRawBufferStore64Bit(Function *F, Type *EltTy, Module &M);
+  // Replace RawBufferLoad/Store of min-precision types to have its actual storage size
+  void ReplaceMinPrecisionRawBufferLoad(Function *F, Module &M);
+  void ReplaceMinPrecisionRawBufferStore(Function *F, Module &M);
+  void ReplaceMinPrecisionRawBufferLoadByType(Function *F, Type *FromTy,
+                                              Type *ToTy, OP *Op,
+                                              const DataLayout &DL);
+};
+} // namespace
+
+void DxilTranslateRawBuffer::ReplaceRawBufferLoad(Function *F,
+                                                                Module &M) {
+  OP *op = M.GetDxilModule().GetOP();
+  Type *RTy = F->getReturnType();
+  if (StructType *STy = dyn_cast<StructType>(RTy)) {
+    Type *ETy = STy->getElementType(0);
+    Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferLoad, ETy);
+    for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+      User *user = *(U++);
+      if (CallInst *CI = dyn_cast<CallInst>(user)) {
+        IRBuilder<> Builder(CI);
+        SmallVector<Value *, 4> args;
+        args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
+        for (unsigned i = 1; i < 4; ++i) {
+          args.emplace_back(CI->getArgOperand(i));
+        }
+        CallInst *newCall = Builder.CreateCall(newFunction, args);
+        CI->replaceAllUsesWith(newCall);
+        CI->eraseFromParent();
+      } else {
+        DXASSERT(false, "function can only be used with call instructions.");
+      }
+    }
+  } else {
+    DXASSERT(false, "RawBufferLoad should return struct type.");
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy, Module &M) {
+  OP *hlslOP = M.GetDxilModule().GetOP();
+  Function *bufLd = hlslOP->GetOpFunc(DXIL::OpCode::RawBufferLoad,
+                                      Type::getInt32Ty(M.getContext()));
+  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+    User *user = *(U++);
+    if (CallInst *CI = dyn_cast<CallInst>(user)) {
+      IRBuilder<> Builder(CI);
+      SmallVector<Value *, 4> args(CI->arg_operands());
+
+      Value *offset = CI->getArgOperand(
+          DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx);
+
+      unsigned size = 0;
+      bool bNeedStatus = false;
+      for (User *U : CI->users()) {
+        ExtractValueInst *Elt = cast<ExtractValueInst>(U);
+        DXASSERT(Elt->getNumIndices() == 1, "else invalid use for resRet");
+        unsigned idx = Elt->getIndices()[0];
+        if (idx == 4) {
+          bNeedStatus = true;
+        } else {
+          size = std::max(size, idx+1);
+        }
+      }
+      unsigned maskHi = 0;
+      unsigned maskLo = 0;
+      switch (size) {
+      case 1:
+        maskLo = 3;
+        break;
+      case 2:
+        maskLo = 0xf;
+        break;
+      case 3:
+        maskLo = 0xf;
+        maskHi = 3;
+        break;
+      case 4:
+        maskLo = 0xf;
+        maskHi = 0xf;
+        break;
+      }
+
+      args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+          Builder.getInt8(maskLo);
+      Value *resultElts[5] = {nullptr, nullptr, nullptr, nullptr, nullptr};
+      CallInst *newLd = Builder.CreateCall(bufLd, args);
+
+      Value *resultElts32[8];
+      unsigned eltBase = 0;
+      for (unsigned i = 0; i < size; i++) {
+        if (i == 2) {
+          // Update offset 4 by 4 bytes.
+          if (isa<UndefValue>(offset)) {
+            // [RW]ByteAddressBuffer has undef element offset -> update index
+            Value *index = CI->getArgOperand(DXIL::OperandIndex::kRawBufferLoadIndexOpIdx);
+            args[DXIL::OperandIndex::kRawBufferLoadIndexOpIdx] =
+              Builder.CreateAdd(index, Builder.getInt32(4 * 4));
+          }
+          else {
+            // [RW]StructuredBuffer -> update element offset
+            args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
+              Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+          }
+          args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+              Builder.getInt8(maskHi);
+          newLd = Builder.CreateCall(bufLd, args);
+          eltBase = 4;
+        }
+        unsigned resBase = 2 * i;
+        resultElts32[resBase] =
+            Builder.CreateExtractValue(newLd, resBase - eltBase);
+        resultElts32[resBase + 1] =
+            Builder.CreateExtractValue(newLd, resBase + 1 - eltBase);
+      }
+
+      Make64bitResultForLoad(EltTy, resultElts32, size, resultElts, hlslOP, Builder);
+      if (bNeedStatus) {
+        resultElts[4] = Builder.CreateExtractValue(newLd, 4);
+      }
+      for (auto it = CI->user_begin(); it != CI->user_end(); ) {
+        ExtractValueInst *Elt = cast<ExtractValueInst>(*(it++));
+        DXASSERT(Elt->getNumIndices() == 1, "else invalid use for resRet");
+        unsigned idx = Elt->getIndices()[0];
+        if (!Elt->user_empty()) {
+          Value *newElt = resultElts[idx];
+          Elt->replaceAllUsesWith(newElt);
+        }
+        Elt->eraseFromParent();
+      }
+
+      CI->eraseFromParent();
+    } else {
+      DXASSERT(false, "function can only be used with call instructions.");
+    }
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceRawBufferStore(Function *F,
+  Module &M) {
+  OP *op = M.GetDxilModule().GetOP();
+  DXASSERT(F->getReturnType()->isVoidTy(), "rawBufferStore should return a void type.");
+  Type *ETy = F->getFunctionType()->getParamType(4); // value
+  Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferStore, ETy);
+  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+    User *user = *(U++);
+    if (CallInst *CI = dyn_cast<CallInst>(user)) {
+      IRBuilder<> Builder(CI);
+      SmallVector<Value *, 4> args;
+      args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferStore));
+      for (unsigned i = 1; i < 9; ++i) {
+        args.emplace_back(CI->getArgOperand(i));
+      }
+      Builder.CreateCall(newFunction, args);
+      CI->eraseFromParent();
+    }
+    else {
+      DXASSERT(false, "function can only be used with call instructions.");
+    }
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceRawBufferStore64Bit(Function *F, Type *ETy,
+                                                        Module &M) {
+  OP *hlslOP = M.GetDxilModule().GetOP();
+  Function *newFunction = hlslOP->GetOpFunc(hlsl::DXIL::OpCode::RawBufferStore,
+                                            Type::getInt32Ty(M.getContext()));
+  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+    User *user = *(U++);
+    if (CallInst *CI = dyn_cast<CallInst>(user)) {
+      IRBuilder<> Builder(CI);
+      SmallVector<Value *, 4> args(CI->arg_operands());
+      Value *vals[4] = {
+          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal0OpIdx),
+          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal1OpIdx),
+          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal2OpIdx),
+          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal3OpIdx)};
+      ConstantInt *cMask = cast<ConstantInt>(
+          CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreMaskOpIdx));
+      Value *undefI32 = UndefValue::get(Builder.getInt32Ty());
+      Value *vals32[8] = {undefI32, undefI32, undefI32, undefI32,
+                          undefI32, undefI32, undefI32, undefI32};
+
+      unsigned maskLo = 0;
+      unsigned maskHi = 0;
+      unsigned size = 0;
+      unsigned mask = cMask->getLimitedValue();
+      switch (mask) {
+      case 1:
+        maskLo = 3;
+        size = 1;
+        break;
+      case 3:
+        maskLo = 15;
+        size = 2;
+        break;
+      case 7:
+        maskLo = 15;
+        maskHi = 3;
+        size = 3;
+        break;
+      case 15:
+        maskLo = 15;
+        maskHi = 15;
+        size = 4;
+        break;
+      default:
+        DXASSERT(0, "invalid mask");
+      }
+
+      Split64bitValForStore(ETy, vals, size, vals32, hlslOP, Builder);
+      args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
+          Builder.getInt8(maskLo);
+      args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[0];
+      args[DXIL::OperandIndex::kRawBufferStoreVal1OpIdx] = vals32[1];
+      args[DXIL::OperandIndex::kRawBufferStoreVal2OpIdx] = vals32[2];
+      args[DXIL::OperandIndex::kRawBufferStoreVal3OpIdx] = vals32[3];
+
+      Builder.CreateCall(newFunction, args);
+
+      if (maskHi) {
+        // Update offset 4 by 4 bytes.
+        Value *offset = args[DXIL::OperandIndex::kBufferStoreCoord1OpIdx];
+        if (isa<UndefValue>(offset)) {
+          // [RW]ByteAddressBuffer has element offset == undef -> update index instead
+          Value *index = args[DXIL::OperandIndex::kBufferStoreCoord0OpIdx];
+          index = Builder.CreateAdd(index, Builder.getInt32(4 * 4));
+          args[DXIL::OperandIndex::kRawBufferStoreIndexOpIdx] = index;
+        }
+        else {
+          // [RW]StructuredBuffer -> update element offset
+          offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+          args[DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx] = offset;
+        }
+        
+        args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
+            Builder.getInt8(maskHi);
+        args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[4];
+        args[DXIL::OperandIndex::kRawBufferStoreVal1OpIdx] = vals32[5];
+        args[DXIL::OperandIndex::kRawBufferStoreVal2OpIdx] = vals32[6];
+        args[DXIL::OperandIndex::kRawBufferStoreVal3OpIdx] = vals32[7];
+
+        Builder.CreateCall(newFunction, args);
+      }
+      CI->eraseFromParent();
+    } else {
+      DXASSERT(false, "function can only be used with call instructions.");
+    }
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoad(Function *F,
+                                                              Module &M) {
+  OP *Op = M.GetDxilModule().GetOP();
+  Type *RetTy = F->getReturnType();
+  if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+    Type *EltTy = STy->getElementType(0);
+    if (EltTy->isHalfTy()) {
+      ReplaceMinPrecisionRawBufferLoadByType(F, Type::getHalfTy(M.getContext()),
+                                             Type::getFloatTy(M.getContext()),
+                                             Op, M.getDataLayout());
+    } else if (EltTy == Type::getInt16Ty(M.getContext())) {
+      ReplaceMinPrecisionRawBufferLoadByType(
+          F, Type::getInt16Ty(M.getContext()), Type::getInt32Ty(M.getContext()),
+          Op, M.getDataLayout());
+    }
+  } else {
+    DXASSERT(false, "RawBufferLoad should return struct type.");
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferStore(Function *F,
+                                                              Module &M) {
+  DXASSERT(F->getReturnType()->isVoidTy(), "rawBufferStore should return a void type.");
+  Type *ETy = F->getFunctionType()->getParamType(4); // value
+  Type *NewETy;
+  if (ETy->isHalfTy()) {
+    NewETy = Type::getFloatTy(M.getContext());
+  }
+  else if (ETy == Type::getInt16Ty(M.getContext())) {
+    NewETy = Type::getInt32Ty(M.getContext());
+  }
+  else {
+    return; // not a min precision type
+  }
+  Function *newFunction = M.GetDxilModule().GetOP()->GetOpFunc(
+      DXIL::OpCode::RawBufferStore, NewETy);
+  // for each function
+  // add argument 4-7 to its upconverted values
+  // replace function call
+  for (auto FuncUser = F->user_begin(), FuncEnd = F->user_end(); FuncUser != FuncEnd;) {
+    CallInst *CI = dyn_cast<CallInst>(*(FuncUser++));
+    DXASSERT(CI, "function user must be a call instruction.");
+    IRBuilder<> CIBuilder(CI);
+    SmallVector<Value *, 9> Args;
+    for (unsigned i = 0; i < 4; ++i) {
+      Args.emplace_back(CI->getArgOperand(i));
+    }
+    // values to store should be converted to its higher precision types
+    if (ETy->isHalfTy()) {
+      for (unsigned i = 4; i < 8; ++i) {
+        Value *NewV = CIBuilder.CreateFPExt(CI->getArgOperand(i),
+                                            Type::getFloatTy(M.getContext()));
+        Args.emplace_back(NewV);
+      }
+    }
+    else if (ETy == Type::getInt16Ty(M.getContext())) {
+      // This case only applies to typed buffer since Store operation of byte
+      // address buffer for min precision is handled by implicit conversion on
+      // intrinsic call. Since we are extending integer, we have to know if we
+      // should sign ext or zero ext. We can do this by iterating checking the
+      // size of the element at struct type and comp type at type annotation
+      CallInst *handleCI = dyn_cast<CallInst>(CI->getArgOperand(1));
+      DXASSERT(handleCI, "otherwise handle was not an argument to buffer store.");
+      ConstantInt *resClass = dyn_cast<ConstantInt>(handleCI->getArgOperand(1));
+      DXASSERT_LOCALVAR(resClass, resClass && resClass->getSExtValue() ==
+                               (unsigned)DXIL::ResourceClass::UAV,
+               "otherwise buffer store called on non uav kind.");
+      ConstantInt *rangeID = dyn_cast<ConstantInt>(handleCI->getArgOperand(2)); // range id or idx?
+      DXASSERT(rangeID, "wrong createHandle call.");
+      DxilResource dxilRes = M.GetDxilModule().GetUAV(rangeID->getSExtValue());
+      StructType *STy = dyn_cast<StructType>(dxilRes.GetRetType());
+      DxilStructAnnotation *SAnnot = M.GetDxilModule().GetTypeSystem().GetStructAnnotation(STy);
+      ConstantInt *offsetInt = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+      unsigned offset = offsetInt->getSExtValue();
+      unsigned currentOffset = 0;
+      for (DxilStructTypeIterator iter = begin(STy, SAnnot), ItEnd = end(STy, SAnnot); iter != ItEnd; ++iter) {
+        std::pair<Type *, DxilFieldAnnotation*> pair = *iter;
+        currentOffset += M.getDataLayout().getTypeAllocSize(pair.first);
+        if (currentOffset > offset) {
+          if (pair.second->GetCompType().IsUIntTy()) {
+            for (unsigned i = 4; i < 8; ++i) {
+              Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
+              Args.emplace_back(NewV);
+            }
+            break;
+          }
+          else if (pair.second->GetCompType().IsIntTy()) {
+            for (unsigned i = 4; i < 8; ++i) {
+              Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
+              Args.emplace_back(NewV);
+            }
+            break;
+          }
+          else {
+            DXASSERT(false, "Invalid comp type");
+          }
+        }
+      }
+    }
+
+    // mask
+    Args.emplace_back(CI->getArgOperand(8));
+    // alignment
+    Args.emplace_back(M.GetDxilModule().GetOP()->GetI32Const(
+        M.getDataLayout().getTypeAllocSize(NewETy)));
+    CIBuilder.CreateCall(newFunction, Args);
+    CI->eraseFromParent();
+   }
+}
+
+
+void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoadByType(
+    Function *F, Type *FromTy, Type *ToTy, OP *Op, const DataLayout &DL) {
+  Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferLoad, ToTy);
+  for (auto FUser = F->user_begin(), FEnd = F->user_end(); FUser != FEnd;) {
+    User *UserCI = *(FUser++);
+    if (CallInst *CI = dyn_cast<CallInst>(UserCI)) {
+      IRBuilder<> CIBuilder(CI);
+      SmallVector<Value *, 5> newFuncArgs;
+      // opcode, handle, index, elementOffset, mask
+      // Compiler is generating correct element offset even for min precision types
+      // So no need to recalculate here
+      for (unsigned i = 0; i < 5; ++i) {
+        newFuncArgs.emplace_back(CI->getArgOperand(i));
+      }
+      // new alignment for new type
+      newFuncArgs.emplace_back(Op->GetI32Const(DL.getTypeAllocSize(ToTy)));
+      CallInst *newCI = CIBuilder.CreateCall(newFunction, newFuncArgs);
+      for (auto CIUser = CI->user_begin(), CIEnd = CI->user_end();
+           CIUser != CIEnd;) {
+        User *UserEV = *(CIUser++);
+        if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(UserEV)) {
+          IRBuilder<> EVBuilder(EV);
+          ArrayRef<unsigned> Indices = EV->getIndices();
+          DXASSERT(Indices.size() == 1, "Otherwise we have wrong extract value.");
+          Value *newEV = EVBuilder.CreateExtractValue(newCI, Indices);
+          Value *newTruncV = nullptr;
+          if (4 == Indices[0]) { // Don't truncate status
+            newTruncV = newEV;
+          }
+          else if (FromTy->isHalfTy()) {
+            newTruncV = EVBuilder.CreateFPTrunc(newEV, FromTy);
+          } else if (FromTy->isIntegerTy()) {
+            newTruncV = EVBuilder.CreateTrunc(newEV, FromTy);
+          } else {
+            DXASSERT(false, "unexpected type conversion");
+          }
+          EV->replaceAllUsesWith(newTruncV);
+          EV->eraseFromParent();
+        }
+      }
+      CI->eraseFromParent();
+    }
+  }
+  F->eraseFromParent();
+}
+
+char DxilTranslateRawBuffer::ID = 0;
+ModulePass *llvm::createDxilTranslateRawBuffer() {
+  return new DxilTranslateRawBuffer();
+}
+
+INITIALIZE_PASS(DxilTranslateRawBuffer, "hlsl-translate-dxil-raw-buffer",
+                "Translate raw buffer load", false, false)

+ 53 - 0
lib/HLSL/HLDeadFunctionElimination.cpp

@@ -0,0 +1,53 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// HLDeadFunctionElimination.cpp                                             //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilUtil.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLModule.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+class HLDeadFunctionElimination : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit HLDeadFunctionElimination () : ModulePass(ID) {}
+
+  const char *getPassName() const override { return "Remove all unused function except entry from HLModule"; }
+
+  bool runOnModule(Module &M) override {
+    if (M.HasHLModule()) {
+      HLModule &HLM = M.GetHLModule();
+
+      bool IsLib = HLM.GetShaderModel()->IsLib();
+      // Remove unused functions except entry and patch constant func.
+      // For library profile, only remove unused external functions.
+      Function *EntryFunc = HLM.GetEntryFunction();
+      Function *PatchConstantFunc = HLM.GetPatchConstantFunction();
+
+      return dxilutil::RemoveUnusedFunctions(M, EntryFunc, PatchConstantFunc,
+                                             IsLib);
+    }
+
+    return false;
+  }
+};
+}
+
+char HLDeadFunctionElimination::ID = 0;
+
+ModulePass *llvm::createHLDeadFunctionEliminationPass() {
+  return new HLDeadFunctionElimination();
+}
+
+INITIALIZE_PASS(HLDeadFunctionElimination, "hl-dfe", "Remove all unused function except entry from HLModule", false, false)

+ 73 - 0
lib/HLSL/HLMetadataPasses.cpp

@@ -0,0 +1,73 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// HLMetadataPasses.cpp                                                      //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLModule.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+class HLEmitMetadata : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit HLEmitMetadata() : ModulePass(ID) {}
+
+  const char *getPassName() const override { return "HLSL High-Level Metadata Emit"; }
+
+  bool runOnModule(Module &M) override {
+    if (M.HasHLModule()) {
+      HLModule::ClearHLMetadata(M);
+      M.GetHLModule().EmitHLMetadata();
+      return true;
+    }
+
+    return false;
+  }
+};
+}
+
+char HLEmitMetadata::ID = 0;
+
+ModulePass *llvm::createHLEmitMetadataPass() {
+  return new HLEmitMetadata();
+}
+
+INITIALIZE_PASS(HLEmitMetadata, "hlsl-hlemit", "HLSL High-Level Metadata Emit", false, false)
+
+///////////////////////////////////////////////////////////////////////////////
+
+namespace {
+class HLEnsureMetadata : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit HLEnsureMetadata() : ModulePass(ID) {}
+
+  const char *getPassName() const override { return "HLSL High-Level Metadata Ensure"; }
+
+  bool runOnModule(Module &M) override {
+    if (!M.HasHLModule()) {
+      M.GetOrCreateHLModule();
+      return true;
+    }
+
+    return false;
+  }
+};
+}
+
+char HLEnsureMetadata::ID = 0;
+
+ModulePass *llvm::createHLEnsureMetadataPass() {
+  return new HLEnsureMetadata();
+}
+
+INITIALIZE_PASS(HLEnsureMetadata, "hlsl-hlensure", "HLSL High-Level Metadata Ensure", false, false)

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -29,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   LowerExpectIntrinsic.cpp
+  LowerTypePasses.cpp
   MemCpyOptimizer.cpp
   MergedLoadStoreMotion.cpp
   NaryReassociate.cpp

+ 816 - 0
lib/Transforms/Scalar/LowerTypePasses.cpp

@@ -0,0 +1,816 @@
+//===- LowerTypePasses.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "dxc/HLSL/HLOperations.h"
+#include "dxc/HLSL/HLModule.h"
+#include "dxc/DXIL/DxilConstants.h"
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilUtil.h"
+#include "dxc/HlslIntrinsicOp.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <vector>
+
+using namespace llvm;
+using namespace hlsl;
+
+static ArrayType* CreateNestArrayTy(Type* FinalEltTy,
+  ArrayRef<ArrayType*> nestArrayTys) {
+  Type* newAT = FinalEltTy;
+  for (auto ArrayTy = nestArrayTys.rbegin(), E = nestArrayTys.rend(); ArrayTy != E;
+    ++ArrayTy)
+    newAT = ArrayType::get(newAT, (*ArrayTy)->getNumElements());
+  return cast<ArrayType>(newAT);
+}
+
+//===----------------------------------------------------------------------===//
+// Lower one type to another type.
+//===----------------------------------------------------------------------===//
+namespace {
+class LowerTypePass : public ModulePass {
+public:
+  explicit LowerTypePass(char &ID)
+      : ModulePass(ID) {}
+
+  bool runOnModule(Module &M) override;
+private:
+  bool runOnFunction(Function &F, bool HasDbgInfo);
+  AllocaInst *lowerAlloca(AllocaInst *A);
+  GlobalVariable *lowerInternalGlobal(GlobalVariable *GV);
+protected:
+  virtual bool needToLower(Value *V) = 0;
+  virtual void lowerUseWithNewValue(Value *V, Value *NewV) = 0;
+  virtual Type *lowerType(Type *Ty) = 0;
+  virtual Constant *lowerInitVal(Constant *InitVal, Type *NewTy) = 0;
+  virtual StringRef getGlobalPrefix() = 0;
+  virtual void initialize(Module &M) {};
+};
+
+AllocaInst *LowerTypePass::lowerAlloca(AllocaInst *A) {
+  IRBuilder<> AllocaBuilder(A);
+  Type *NewTy = lowerType(A->getAllocatedType());
+  return AllocaBuilder.CreateAlloca(NewTy);
+}
+
+GlobalVariable *LowerTypePass::lowerInternalGlobal(GlobalVariable *GV) {
+  Type *NewTy = lowerType(GV->getType()->getPointerElementType());
+  // So set init val to undef.
+  Constant *InitVal = UndefValue::get(NewTy);
+  if (GV->hasInitializer()) {
+    Constant *OldInitVal = GV->getInitializer();
+    if (isa<ConstantAggregateZero>(OldInitVal))
+      InitVal = ConstantAggregateZero::get(NewTy);
+    else if (!isa<UndefValue>(OldInitVal)) {
+      InitVal = lowerInitVal(OldInitVal, NewTy);
+    }
+  }
+
+  bool isConst = GV->isConstant();
+  GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
+  unsigned AddressSpace = GV->getType()->getAddressSpace();
+  GlobalValue::LinkageTypes linkage = GV->getLinkage();
+
+  Module *M = GV->getParent();
+  GlobalVariable *NewGV = new llvm::GlobalVariable(
+      *M, NewTy, /*IsConstant*/ isConst, linkage,
+      /*InitVal*/ InitVal, GV->getName() + getGlobalPrefix(),
+      /*InsertBefore*/ nullptr, TLMode, AddressSpace);
+  return NewGV;
+}
+
+bool LowerTypePass::runOnFunction(Function &F, bool HasDbgInfo) {
+  std::vector<AllocaInst *> workList;
+  // Scan the entry basic block, adding allocas to the worklist.
+  BasicBlock &BB = F.getEntryBlock();
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+    if (!isa<AllocaInst>(I))
+      continue;
+    AllocaInst *A = cast<AllocaInst>(I);
+    if (needToLower(A))
+      workList.emplace_back(A);
+  }
+  LLVMContext &Context = F.getContext();
+  for (AllocaInst *A : workList) {
+    AllocaInst *NewA = lowerAlloca(A);
+    if (HasDbgInfo) {
+      // Add debug info.
+      DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A);
+      if (DDI) {
+        Value *DDIVar = MetadataAsValue::get(Context, DDI->getRawVariable());
+        Value *DDIExp = MetadataAsValue::get(Context, DDI->getRawExpression());
+        Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(NewA));
+        IRBuilder<> debugBuilder(DDI);
+        debugBuilder.CreateCall(DDI->getCalledFunction(),
+                                {VMD, DDIVar, DDIExp});
+      }
+    }
+    // Replace users.
+    lowerUseWithNewValue(A, NewA);
+    // Remove alloca.
+    A->eraseFromParent();
+  }
+  return true;
+}
+
+bool LowerTypePass::runOnModule(Module &M) {
+  initialize(M);
+  // Load up debug information, to cross-reference values and the instructions
+  // used to load them.
+  bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  llvm::DebugInfoFinder Finder;
+  if (HasDbgInfo) {
+    Finder.processModule(M);
+  }
+
+  std::vector<AllocaInst*> multiDimAllocas;
+  for (Function &F : M.functions()) {
+    if (F.isDeclaration())
+      continue;
+    runOnFunction(F, HasDbgInfo);
+  }
+
+  // Work on internal global.
+  std::vector<GlobalVariable *> vecGVs;
+  for (GlobalVariable &GV : M.globals()) {
+    if (dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV)) {
+      if (needToLower(&GV) && !GV.user_empty())
+        vecGVs.emplace_back(&GV);
+    }
+  }
+
+  for (GlobalVariable *GV : vecGVs) {
+    GlobalVariable *NewGV = lowerInternalGlobal(GV);
+    // Add debug info.
+    if (HasDbgInfo) {
+      HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, NewGV);
+    }
+    // Replace users.
+    lowerUseWithNewValue(GV, NewGV);
+    // Remove GV.
+    GV->removeDeadConstantUsers();
+    GV->eraseFromParent();
+  }
+
+  return true;
+}
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// DynamicIndexingVector to Array.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class DynamicIndexingVectorToArray : public LowerTypePass {
+  bool ReplaceAllVectors;
+public:
+  explicit DynamicIndexingVectorToArray(bool ReplaceAll = false)
+      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {}
+  static char ID; // Pass identification, replacement for typeid
+  void applyOptions(PassOptions O) override;
+  void dumpConfig(raw_ostream &OS) override;
+protected:
+  bool needToLower(Value *V) override;
+  void lowerUseWithNewValue(Value *V, Value *NewV) override;
+  Type *lowerType(Type *Ty) override;
+  Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
+  StringRef getGlobalPrefix() override { return ".v"; }
+
+private:
+  bool HasVectorDynamicIndexing(Value *V);
+  void ReplaceVecGEP(Value *GEP, ArrayRef<Value *> idxList, Value *A,
+                     IRBuilder<> &Builder);
+  void ReplaceVecArrayGEP(Value *GEP, ArrayRef<Value *> idxList, Value *A,
+                          IRBuilder<> &Builder);
+  void ReplaceVectorWithArray(Value *Vec, Value *Array);
+  void ReplaceVectorArrayWithArray(Value *VecArray, Value *Array);
+  void ReplaceStaticIndexingOnVector(Value *V);
+  void ReplaceAddrSpaceCast(ConstantExpr *CE,
+                            Value *A, IRBuilder<> &Builder);
+};
+
+void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
+  GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors,
+                    ReplaceAllVectors);
+}
+void DynamicIndexingVectorToArray::dumpConfig(raw_ostream &OS) {
+  ModulePass::dumpConfig(OS);
+  OS << ",ReplaceAllVectors=" << ReplaceAllVectors;
+}
+
+void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
+  for (auto U = V->user_begin(), E = V->user_end(); U != E;) {
+    Value *User = *(U++);
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // Only work on element access for vector.
+      if (GEP->getNumOperands() == 3) {
+        auto Idx = GEP->idx_begin();
+        // Skip the pointer idx.
+        Idx++;
+        ConstantInt *constIdx = cast<ConstantInt>(Idx);
+
+        for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end();
+             GEPU != GEPE;) {
+          Instruction *GEPUser = cast<Instruction>(*(GEPU++));
+
+          IRBuilder<> Builder(GEPUser);
+
+          if (LoadInst *ldInst = dyn_cast<LoadInst>(GEPUser)) {
+            // Change
+            //    ld a->x
+            // into
+            //    b = ld a
+            //    b.x
+            Value *ldVal = Builder.CreateLoad(V);
+            Value *Elt = Builder.CreateExtractElement(ldVal, constIdx);
+            ldInst->replaceAllUsesWith(Elt);
+            ldInst->eraseFromParent();
+          } else {
+            // Change
+            //    st val, a->x
+            // into
+            //    tmp = ld a
+            //    tmp.x = val
+            //    st tmp, a
+            // Must be store inst here.
+            StoreInst *stInst = cast<StoreInst>(GEPUser);
+            Value *val = stInst->getValueOperand();
+            Value *ldVal = Builder.CreateLoad(V);
+            ldVal = Builder.CreateInsertElement(ldVal, val, constIdx);
+            Builder.CreateStore(ldVal, V);
+            stInst->eraseFromParent();
+          }
+        }
+        GEP->eraseFromParent();
+      } else if (GEP->getNumIndices() == 1) {
+        Value *Idx = *GEP->idx_begin();
+        if (ConstantInt *C = dyn_cast<ConstantInt>(Idx)) {
+          if (C->getLimitedValue() == 0) {
+            GEP->replaceAllUsesWith(V);
+            GEP->eraseFromParent();
+          }
+        }
+      }
+    }
+  }
+}
+
+bool DynamicIndexingVectorToArray::needToLower(Value *V) {
+  Type *Ty = V->getType()->getPointerElementType();
+  if (dyn_cast<VectorType>(Ty)) {
+    if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
+      return true;
+    }
+    // Don't lower local vector which only static indexing.
+    if (HasVectorDynamicIndexing(V)) {
+      return true;
+    } else {
+      // Change vector indexing with ld st.
+      ReplaceStaticIndexingOnVector(V);
+      return false;
+    }
+  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    // Array must be replaced even without dynamic indexing to remove vector
+    // type in dxil.
+    // TODO: optimize static array index in later pass.
+    Type *EltTy = dxilutil::GetArrayEltTy(AT);
+    return isa<VectorType>(EltTy);
+  }
+  return false;
+}
+
+void DynamicIndexingVectorToArray::ReplaceVecGEP(Value *GEP, ArrayRef<Value *> idxList,
+                                       Value *A, IRBuilder<> &Builder) {
+  Value *newGEP = Builder.CreateGEP(A, idxList);
+  if (GEP->getType()->getPointerElementType()->isVectorTy()) {
+    ReplaceVectorWithArray(GEP, newGEP);
+  } else {
+    GEP->replaceAllUsesWith(newGEP);
+  }
+}
+
+void DynamicIndexingVectorToArray::ReplaceAddrSpaceCast(ConstantExpr *CE,
+                                              Value *A, IRBuilder<> &Builder) {
+  // create new AddrSpaceCast.
+  Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
+    A,
+    PointerType::get(A->getType()->getPointerElementType(),
+                      CE->getType()->getPointerAddressSpace()));
+  ReplaceVectorWithArray(CE, NewAddrSpaceCast);
+}
+
+void DynamicIndexingVectorToArray::ReplaceVectorWithArray(Value *Vec, Value *A) {
+  unsigned size = Vec->getType()->getPointerElementType()->getVectorNumElements();
+  for (auto U = Vec->user_begin(); U != Vec->user_end();) {
+    User *User = (*U++);
+
+    // GlobalVariable user.
+    if (ConstantExpr * CE = dyn_cast<ConstantExpr>(User)) {
+      if (User->user_empty())
+        continue;
+      if (GEPOperator *GEP = dyn_cast<GEPOperator>(User)) {
+        IRBuilder<> Builder(Vec->getContext());
+        SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
+        ReplaceVecGEP(GEP, idxList, A, Builder);
+        continue;
+      } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
+        IRBuilder<> Builder(Vec->getContext());
+        ReplaceAddrSpaceCast(CE, A, Builder);
+        continue;
+      }
+      DXASSERT(0, "not implemented yet");
+    }
+    // Instrution user.
+    Instruction *UserInst = cast<Instruction>(User);
+    IRBuilder<> Builder(UserInst);
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
+      ReplaceVecGEP(cast<GEPOperator>(GEP), idxList, A, Builder);
+      GEP->eraseFromParent();
+    } else if (LoadInst *ldInst = dyn_cast<LoadInst>(User)) {
+      // If ld whole struct, need to split the load.
+      Value *newLd = UndefValue::get(ldInst->getType());
+      Value *zero = Builder.getInt32(0);
+      for (unsigned i = 0; i < size; i++) {
+        Value *idx = Builder.getInt32(i);
+        Value *GEP = Builder.CreateInBoundsGEP(A, {zero, idx});
+        Value *Elt = Builder.CreateLoad(GEP);
+        newLd = Builder.CreateInsertElement(newLd, Elt, i);
+      }
+      ldInst->replaceAllUsesWith(newLd);
+      ldInst->eraseFromParent();
+    } else if (StoreInst *stInst = dyn_cast<StoreInst>(User)) {
+      Value *val = stInst->getValueOperand();
+      Value *zero = Builder.getInt32(0);
+      for (unsigned i = 0; i < size; i++) {
+        Value *Elt = Builder.CreateExtractElement(val, i);
+        Value *idx = Builder.getInt32(i);
+        Value *GEP = Builder.CreateInBoundsGEP(A, {zero, idx});
+        Builder.CreateStore(Elt, GEP);
+      }
+      stInst->eraseFromParent();
+    } else {
+      // Vector parameter should be lowered.
+      // No function call should use vector.
+      DXASSERT(0, "not implement yet");
+    }
+  }
+}
+
+void DynamicIndexingVectorToArray::ReplaceVecArrayGEP(Value *GEP,
+                                            ArrayRef<Value *> idxList, Value *A,
+                                            IRBuilder<> &Builder) {
+  Value *newGEP = Builder.CreateGEP(A, idxList);
+  Type *Ty = GEP->getType()->getPointerElementType();
+  if (Ty->isVectorTy()) {
+    ReplaceVectorWithArray(GEP, newGEP);
+  } else if (Ty->isArrayTy()) {
+    ReplaceVectorArrayWithArray(GEP, newGEP);
+  } else {
+    DXASSERT(Ty->isSingleValueType(), "must be vector subscript here");
+    GEP->replaceAllUsesWith(newGEP);
+  }
+}
+
+void DynamicIndexingVectorToArray::ReplaceVectorArrayWithArray(Value *VA, Value *A) {
+  for (auto U = VA->user_begin(); U != VA->user_end();) {
+    User *User = *(U++);
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      IRBuilder<> Builder(GEP);
+      SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
+      ReplaceVecArrayGEP(GEP, idxList, A, Builder);
+      GEP->eraseFromParent();
+    } else if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(User)) {
+      IRBuilder<> Builder(GEPOp->getContext());
+      SmallVector<Value *, 4> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
+      ReplaceVecArrayGEP(GEPOp, idxList, A, Builder);
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      BCI->setOperand(0, A);
+    } else {
+      DXASSERT(0, "Array pointer should only used by GEP");
+    }
+  }
+}
+
+void DynamicIndexingVectorToArray::lowerUseWithNewValue(Value *V, Value *NewV) {
+  Type *Ty = V->getType()->getPointerElementType();
+  // Replace V with NewV.
+  if (Ty->isVectorTy()) {
+    ReplaceVectorWithArray(V, NewV);
+  } else {
+    ReplaceVectorArrayWithArray(V, NewV);
+  }
+}
+
+Type *DynamicIndexingVectorToArray::lowerType(Type *Ty) {
+  if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
+    return ArrayType::get(VT->getElementType(), VT->getNumElements());
+  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    SmallVector<ArrayType *, 4> nestArrayTys;
+    nestArrayTys.emplace_back(AT);
+
+    Type *EltTy = AT->getElementType();
+    // support multi level of array
+    while (EltTy->isArrayTy()) {
+      ArrayType *ElAT = cast<ArrayType>(EltTy);
+      nestArrayTys.emplace_back(ElAT);
+      EltTy = ElAT->getElementType();
+    }
+    if (EltTy->isVectorTy()) {
+      Type *vecAT = ArrayType::get(EltTy->getVectorElementType(),
+                                   EltTy->getVectorNumElements());
+      return CreateNestArrayTy(vecAT, nestArrayTys);
+    }
+    return nullptr;
+  }
+  return nullptr;
+}
+
+Constant *DynamicIndexingVectorToArray::lowerInitVal(Constant *InitVal, Type *NewTy) {
+  Type *VecTy = InitVal->getType();
+  ArrayType *ArrayTy = cast<ArrayType>(NewTy);
+  if (VecTy->isVectorTy()) {
+    SmallVector<Constant *, 4> Elts;
+    for (unsigned i = 0; i < VecTy->getVectorNumElements(); i++) {
+      Elts.emplace_back(InitVal->getAggregateElement(i));
+    }
+    return ConstantArray::get(ArrayTy, Elts);
+  } else {
+    ArrayType *AT = cast<ArrayType>(VecTy);
+    ArrayType *EltArrayTy = cast<ArrayType>(ArrayTy->getElementType());
+    SmallVector<Constant *, 4> Elts;
+    for (unsigned i = 0; i < AT->getNumElements(); i++) {
+      Constant *Elt = lowerInitVal(InitVal->getAggregateElement(i), EltArrayTy);
+      Elts.emplace_back(Elt);
+    }
+    return ConstantArray::get(ArrayTy, Elts);
+  }
+}
+
+bool DynamicIndexingVectorToArray::HasVectorDynamicIndexing(Value *V) {
+  return dxilutil::HasDynamicIndexing(V);
+}
+
+}
+
+char DynamicIndexingVectorToArray::ID = 0;
+
+INITIALIZE_PASS(DynamicIndexingVectorToArray, "dynamic-vector-to-array",
+  "Replace dynamic indexing vector with array", false,
+  false)
+
+// Public interface to the DynamicIndexingVectorToArray pass
+ModulePass *llvm::createDynamicIndexingVectorToArrayPass(bool ReplaceAllVector) {
+  return new DynamicIndexingVectorToArray(ReplaceAllVector);
+}
+
+//===----------------------------------------------------------------------===//
+// Flatten multi dim array into 1 dim.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class MultiDimArrayToOneDimArray : public LowerTypePass {
+public:
+  explicit MultiDimArrayToOneDimArray() : LowerTypePass(ID) {}
+  static char ID; // Pass identification, replacement for typeid
+protected:
+  bool needToLower(Value *V) override;
+  void lowerUseWithNewValue(Value *V, Value *NewV) override;
+  Type *lowerType(Type *Ty) override;
+  Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
+  StringRef getGlobalPrefix() override { return ".1dim"; }
+};
+
+bool MultiDimArrayToOneDimArray::needToLower(Value *V) {
+  Type *Ty = V->getType()->getPointerElementType();
+  ArrayType *AT = dyn_cast<ArrayType>(Ty);
+  if (!AT)
+    return false;
+  if (!isa<ArrayType>(AT->getElementType())) {
+    return false;
+  } else {
+    // Merge all GEP.
+    HLModule::MergeGepUse(V);
+    return true;
+  }
+}
+
+void ReplaceMultiDimGEP(User *GEP, Value *OneDim, IRBuilder<> &Builder) {
+  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
+
+  Value *PtrOffset = GEPIt.getOperand();
+  ++GEPIt;
+  Value *ArrayIdx = GEPIt.getOperand();
+  ++GEPIt;
+  Value *VecIdx = nullptr;
+  for (; GEPIt != E; ++GEPIt) {
+    if (GEPIt->isArrayTy()) {
+      unsigned arraySize = GEPIt->getArrayNumElements();
+      Value *V = GEPIt.getOperand();
+      ArrayIdx = Builder.CreateMul(ArrayIdx, Builder.getInt32(arraySize));
+      ArrayIdx = Builder.CreateAdd(V, ArrayIdx);
+    } else {
+      DXASSERT_NOMSG(isa<VectorType>(*GEPIt));
+      VecIdx = GEPIt.getOperand();
+    }
+  }
+  Value *NewGEP = nullptr;
+  if (!VecIdx)
+    NewGEP = Builder.CreateGEP(OneDim, {PtrOffset, ArrayIdx});
+  else
+    NewGEP = Builder.CreateGEP(OneDim, {PtrOffset, ArrayIdx, VecIdx});
+
+  GEP->replaceAllUsesWith(NewGEP);
+}
+
+void MultiDimArrayToOneDimArray::lowerUseWithNewValue(Value *MultiDim, Value *OneDim) {
+  LLVMContext &Context = MultiDim->getContext();
+  // All users should be element type.
+  // Replace users of AI or GV.
+  for (auto it = MultiDim->user_begin(); it != MultiDim->user_end();) {
+    User *U = *(it++);
+    if (U->user_empty())
+      continue;
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      BCI->setOperand(0, OneDim);
+      continue;
+    }
+
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      IRBuilder<> Builder(Context);
+      if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+        // NewGEP must be GEPOperator too.
+        // No instruction will be build.
+        ReplaceMultiDimGEP(U, OneDim, Builder);
+      } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
+        Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
+          OneDim,
+          PointerType::get(OneDim->getType()->getPointerElementType(),
+                           CE->getType()->getPointerAddressSpace()));
+        lowerUseWithNewValue(CE, NewAddrSpaceCast);
+      } else {
+        DXASSERT(0, "not implemented");
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      IRBuilder<> Builder(GEP);
+      ReplaceMultiDimGEP(U, OneDim, Builder);
+      GEP->eraseFromParent();
+    } else {
+      DXASSERT(0, "not implemented");
+    }
+  }
+}
+
+Type *MultiDimArrayToOneDimArray::lowerType(Type *Ty) {
+  ArrayType *AT = cast<ArrayType>(Ty);
+  unsigned arraySize = AT->getNumElements();
+
+  Type *EltTy = AT->getElementType();
+  // support multi level of array
+  while (EltTy->isArrayTy()) {
+    ArrayType *ElAT = cast<ArrayType>(EltTy);
+    arraySize *= ElAT->getNumElements();
+    EltTy = ElAT->getElementType();
+  }
+
+  return ArrayType::get(EltTy, arraySize);
+}
+
+void FlattenMultiDimConstArray(Constant *V, std::vector<Constant *> &Elts) {
+  if (!V->getType()->isArrayTy()) {
+    Elts.emplace_back(V);
+  } else {
+    ArrayType *AT = cast<ArrayType>(V->getType());
+    for (unsigned i = 0; i < AT->getNumElements(); i++) {
+      FlattenMultiDimConstArray(V->getAggregateElement(i), Elts);
+    }
+  }
+}
+
+Constant *MultiDimArrayToOneDimArray::lowerInitVal(Constant *InitVal, Type *NewTy) {
+  if (InitVal) {
+    // MultiDim array init should be done by store.
+    if (isa<ConstantAggregateZero>(InitVal))
+      InitVal = ConstantAggregateZero::get(NewTy);
+    else if (isa<UndefValue>(InitVal))
+      InitVal = UndefValue::get(NewTy);
+    else {
+      std::vector<Constant *> Elts;
+      FlattenMultiDimConstArray(InitVal, Elts);
+      InitVal = ConstantArray::get(cast<ArrayType>(NewTy), Elts);
+    }
+  } else {
+    InitVal = UndefValue::get(NewTy);
+  }
+  return InitVal;
+}
+
+}
+
+char MultiDimArrayToOneDimArray::ID = 0;
+
+INITIALIZE_PASS(MultiDimArrayToOneDimArray, "multi-dim-one-dim",
+  "Flatten multi-dim array into one-dim array", false,
+  false)
+
+// Public interface to the SROA_Parameter_HLSL pass
+ModulePass *llvm::createMultiDimArrayToOneDimArrayPass() {
+  return new MultiDimArrayToOneDimArray();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower resource into handle.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class ResourceToHandle : public LowerTypePass {
+public:
+  explicit ResourceToHandle() : LowerTypePass(ID) {}
+  static char ID; // Pass identification, replacement for typeid
+protected:
+  bool needToLower(Value *V) override;
+  void lowerUseWithNewValue(Value *V, Value *NewV) override;
+  Type *lowerType(Type *Ty) override;
+  Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
+  StringRef getGlobalPrefix() override { return ".res"; }
+  void initialize(Module &M) override;
+private:
+  void ReplaceResourceWithHandle(Value *ResPtr, Value *HandlePtr);
+  void ReplaceResourceGEPWithHandleGEP(Value *GEP, ArrayRef<Value *> idxList,
+                                       Value *A, IRBuilder<> &Builder);
+  void ReplaceResourceArrayWithHandleArray(Value *VA, Value *A);
+
+  Type *m_HandleTy;
+  HLModule *m_pHLM;
+  bool  m_bIsLib;
+};
+
+void ResourceToHandle::initialize(Module &M) {
+  DXASSERT(M.HasHLModule(), "require HLModule");
+  m_pHLM = &M.GetHLModule();
+  m_HandleTy = m_pHLM->GetOP()->GetHandleType();
+  m_bIsLib = m_pHLM->GetShaderModel()->IsLib();
+}
+
+bool ResourceToHandle::needToLower(Value *V) {
+  Type *Ty = V->getType()->getPointerElementType();
+  Ty = dxilutil::GetArrayEltTy(Ty);
+  return (dxilutil::IsHLSLObjectType(Ty) &&
+          !HLModule::IsStreamOutputType(Ty)) &&
+         // Skip lib profile.
+         !m_bIsLib;
+}
+
+Type *ResourceToHandle::lowerType(Type *Ty) {
+  if ((dxilutil::IsHLSLObjectType(Ty) && !HLModule::IsStreamOutputType(Ty))) {
+    return m_HandleTy;
+  }
+
+  ArrayType *AT = cast<ArrayType>(Ty);
+
+  SmallVector<ArrayType *, 4> nestArrayTys;
+  nestArrayTys.emplace_back(AT);
+
+  Type *EltTy = AT->getElementType();
+  // support multi level of array
+  while (EltTy->isArrayTy()) {
+    ArrayType *ElAT = cast<ArrayType>(EltTy);
+    nestArrayTys.emplace_back(ElAT);
+    EltTy = ElAT->getElementType();
+  }
+
+  return CreateNestArrayTy(m_HandleTy, nestArrayTys);
+}
+
+Constant *ResourceToHandle::lowerInitVal(Constant *InitVal, Type *NewTy) {
+  DXASSERT(isa<UndefValue>(InitVal), "resource cannot have real init val");
+  return UndefValue::get(NewTy);
+}
+
+void ResourceToHandle::ReplaceResourceWithHandle(Value *ResPtr,
+                                                 Value *HandlePtr) {
+  for (auto it = ResPtr->user_begin(); it != ResPtr->user_end();) {
+    User *U = *(it++);
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      IRBuilder<> Builder(LI);
+      Value *Handle = Builder.CreateLoad(HandlePtr);
+      Type *ResTy = LI->getType();
+      // Used by createHandle or Store.
+      for (auto ldIt = LI->user_begin(); ldIt != LI->user_end();) {
+        User *ldU = *(ldIt++);
+        if (StoreInst *SI = dyn_cast<StoreInst>(ldU)) {
+          Value *TmpRes = HLModule::EmitHLOperationCall(
+              Builder, HLOpcodeGroup::HLCast,
+              (unsigned)HLCastOpcode::HandleToResCast, ResTy, {Handle},
+              *m_pHLM->GetModule());
+          SI->replaceUsesOfWith(LI, TmpRes);
+        } else {
+          CallInst *CI = cast<CallInst>(ldU);
+          DXASSERT(hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) == HLOpcodeGroup::HLCreateHandle,
+                   "must be createHandle");
+          CI->replaceAllUsesWith(Handle);
+          CI->eraseFromParent();
+        }
+      }
+      LI->eraseFromParent();
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      Value *Res = SI->getValueOperand();
+      IRBuilder<> Builder(SI);
+      // CreateHandle from Res.
+      Value *Handle = HLModule::EmitHLOperationCall(
+          Builder, HLOpcodeGroup::HLCreateHandle,
+          /*opcode*/ 0, m_HandleTy, {Res}, *m_pHLM->GetModule());
+      // Store Handle to HandlePtr.
+      Builder.CreateStore(Handle, HandlePtr);
+      // Remove resource Store.
+      SI->eraseFromParent();
+    } else if (U->user_empty() && isa<GEPOperator>(U)) {
+      continue;
+    } else {
+      CallInst *CI = cast<CallInst>(U);
+      IRBuilder<> Builder(CI);
+      HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
+      // Allow user function to use res ptr as argument.
+      if (group == HLOpcodeGroup::NotHL) {
+          Value *TmpResPtr = Builder.CreateBitCast(HandlePtr, ResPtr->getType());
+          CI->replaceUsesOfWith(ResPtr, TmpResPtr);
+      } else {
+        DXASSERT(0, "invalid operation on resource");
+      }
+    }
+  }
+}
+
+void ResourceToHandle::ReplaceResourceGEPWithHandleGEP(
+    Value *GEP, ArrayRef<Value *> idxList, Value *A, IRBuilder<> &Builder) {
+  Value *newGEP = Builder.CreateGEP(A, idxList);
+  Type *Ty = GEP->getType()->getPointerElementType();
+  if (Ty->isArrayTy()) {
+    ReplaceResourceArrayWithHandleArray(GEP, newGEP);
+  } else {
+    DXASSERT(dxilutil::IsHLSLObjectType(Ty), "must be resource type here");
+    ReplaceResourceWithHandle(GEP, newGEP);
+  }
+}
+
+void ResourceToHandle::ReplaceResourceArrayWithHandleArray(Value *VA,
+                                                           Value *A) {
+  for (auto U = VA->user_begin(); U != VA->user_end();) {
+    User *User = *(U++);
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      IRBuilder<> Builder(GEP);
+      SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
+      ReplaceResourceGEPWithHandleGEP(GEP, idxList, A, Builder);
+      GEP->eraseFromParent();
+    } else if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(User)) {
+      IRBuilder<> Builder(GEPOp->getContext());
+      SmallVector<Value *, 4> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
+      ReplaceResourceGEPWithHandleGEP(GEPOp, idxList, A, Builder);
+    } else {
+      DXASSERT(0, "Array pointer should only used by GEP");
+    }
+  }
+}
+
+void ResourceToHandle::lowerUseWithNewValue(Value *V, Value *NewV) {
+  Type *Ty = V->getType()->getPointerElementType();
+  // Replace V with NewV.
+  if (Ty->isArrayTy()) {
+    ReplaceResourceArrayWithHandleArray(V, NewV);
+  } else {
+    ReplaceResourceWithHandle(V, NewV);
+  }
+}
+
+}
+
+char ResourceToHandle::ID = 0;
+
+INITIALIZE_PASS(ResourceToHandle, "resource-handle",
+  "Lower resource into handle", false,
+  false)
+
+// Public interface to the ResourceToHandle pass
+ModulePass *llvm::createResourceToHandlePass() {
+  return new ResourceToHandle();
+}

+ 0 - 1536
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -307,767 +307,6 @@ FunctionPass *llvm::createScalarReplAggregatesHLSLPass(bool UseDomTree, bool Pro
   return new SROA_SSAUp_HLSL(Promote);
 }
 
-//===----------------------------------------------------------------------===//
-// Convert To Scalar Optimization.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
-/// optimization, which scans the uses of an alloca and determines if it can
-/// rewrite it in terms of a single new alloca that can be mem2reg'd.
-class ConvertToScalarInfo {
-  /// AllocaSize - The size of the alloca being considered in bytes.
-  unsigned AllocaSize;
-  const DataLayout &DL;
-  unsigned ScalarLoadThreshold;
-
-  /// IsNotTrivial - This is set to true if there is some access to the object
-  /// which means that mem2reg can't promote it.
-  bool IsNotTrivial;
-
-  /// ScalarKind - Tracks the kind of alloca being considered for promotion,
-  /// computed based on the uses of the alloca rather than the LLVM type system.
-  enum {
-    Unknown,
-
-    // Accesses via GEPs that are consistent with element access of a vector
-    // type. This will not be converted into a vector unless there is a later
-    // access using an actual vector type.
-    ImplicitVector,
-
-    // Accesses via vector operations and GEPs that are consistent with the
-    // layout of a vector type.
-    Vector,
-
-    // An integer bag-of-bits with bitwise operations for insertion and
-    // extraction. Any combination of types can be converted into this kind
-    // of scalar.
-    Integer
-  } ScalarKind;
-
-  /// VectorTy - This tracks the type that we should promote the vector to if
-  /// it is possible to turn it into a vector.  This starts out null, and if it
-  /// isn't possible to turn into a vector type, it gets set to VoidTy.
-  VectorType *VectorTy;
-
-  /// HadNonMemTransferAccess - True if there is at least one access to the
-  /// alloca that is not a MemTransferInst.  We don't want to turn structs into
-  /// large integers unless there is some potential for optimization.
-  bool HadNonMemTransferAccess;
-
-  /// HadDynamicAccess - True if some element of this alloca was dynamic.
-  /// We don't yet have support for turning a dynamic access into a large
-  /// integer.
-  bool HadDynamicAccess;
-
-public:
-  explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
-                               unsigned SLT)
-      : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
-        ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
-        HadDynamicAccess(false) {}
-
-  AllocaInst *TryConvert(AllocaInst *AI);
-
-private:
-  bool CanConvertToScalar(Value *V, uint64_t Offset, Value *NonConstantIdx);
-  void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
-  bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
-  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
-                           Value *NonConstantIdx);
-
-  Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, uint64_t Offset,
-                                    Value *NonConstantIdx,
-                                    IRBuilder<> &Builder);
-  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
-                                   uint64_t Offset, Value *NonConstantIdx,
-                                   IRBuilder<> &Builder);
-};
-} // end anonymous namespace.
-
-/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
-/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
-/// alloca if possible or null if not.
-AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
-  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
-  // out.
-  if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
-    return nullptr;
-
-  // If an alloca has only memset / memcpy uses, it may still have an Unknown
-  // ScalarKind. Treat it as an Integer below.
-  if (ScalarKind == Unknown)
-    ScalarKind = Integer;
-
-  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
-    ScalarKind = Integer;
-
-  // If we were able to find a vector type that can handle this with
-  // insert/extract elements, and if there was at least one use that had
-  // a vector type, promote this to a vector.  We don't want to promote
-  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
-  // we just get a lot of insert/extracts.  If at least one vector is
-  // involved, then we probably really do have a union of vector/array.
-  Type *NewTy;
-  if (ScalarKind == Vector) {
-    assert(VectorTy && "Missing type for vector scalar.");
-    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = " << *VectorTy
-                 << '\n');
-    NewTy = VectorTy; // Use the vector type.
-  } else {
-    unsigned BitWidth = AllocaSize * 8;
-
-    // Do not convert to scalar integer if the alloca size exceeds the
-    // scalar load threshold.
-    if (BitWidth > ScalarLoadThreshold)
-      return nullptr;
-
-    if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
-        !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
-      return nullptr;
-    // Dynamic accesses on integers aren't yet supported.  They need us to shift
-    // by a dynamic amount which could be difficult to work out as we might not
-    // know whether to use a left or right shift.
-    if (ScalarKind == Integer && HadDynamicAccess)
-      return nullptr;
-
-    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
-    // Create and insert the integer alloca.
-    NewTy = IntegerType::get(AI->getContext(), BitWidth);
-  }
-  AllocaInst *NewAI =
-      new AllocaInst(NewTy, nullptr, "", AI->getParent()->begin());
-  ConvertUsesToScalar(AI, NewAI, 0, nullptr);
-  return NewAI;
-}
-
-/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
-/// (VectorTy) so far at the offset specified by Offset (which is specified in
-/// bytes).
-///
-/// There are two cases we handle here:
-///   1) A union of vector types of the same size and potentially its elements.
-///      Here we turn element accesses into insert/extract element operations.
-///      This promotes a <4 x float> with a store of float to the third element
-///      into a <4 x float> that uses insert element.
-///   2) A fully general blob of memory, which we turn into some (potentially
-///      large) integer type with extract and insert operations where the loads
-///      and stores would mutate the memory.  We mark this by setting VectorTy
-///      to VoidTy.
-void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, uint64_t Offset) {
-  // If we already decided to turn this into a blob of integer memory, there is
-  // nothing to be done.
-  if (ScalarKind == Integer)
-    return;
-
-  // If this could be contributing to a vector, analyze it.
-
-  // If the In type is a vector that is the same size as the alloca, see if it
-  // matches the existing VecTy.
-  if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
-    if (MergeInVectorType(VInTy, Offset))
-      return;
-  } else if (In->isFloatTy() || In->isDoubleTy() ||
-             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
-              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
-    // Full width accesses can be ignored, because they can always be turned
-    // into bitcasts.
-    unsigned EltSize = In->getPrimitiveSizeInBits() / 8;
-    if (EltSize == AllocaSize)
-      return;
-
-    // If we're accessing something that could be an element of a vector, see
-    // if the implied vector agrees with what we already have and if Offset is
-    // compatible with it.
-    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
-        (!VectorTy ||
-         EltSize == VectorTy->getElementType()->getPrimitiveSizeInBits() / 8)) {
-      if (!VectorTy) {
-        ScalarKind = ImplicitVector;
-        VectorTy = VectorType::get(In, AllocaSize / EltSize);
-      }
-      return;
-    }
-  }
-
-  // Otherwise, we have a case that we can't handle with an optimized vector
-  // form.  We can still turn this into a large integer.
-  ScalarKind = Integer;
-}
-
-/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
-/// returning true if the type was successfully merged and false otherwise.
-bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
-                                            uint64_t Offset) {
-  if (VInTy->getBitWidth() / 8 == AllocaSize && Offset == 0) {
-    // If we're storing/loading a vector of the right size, allow it as a
-    // vector.  If this the first vector we see, remember the type so that
-    // we know the element size. If this is a subsequent access, ignore it
-    // even if it is a differing type but the same size. Worst case we can
-    // bitcast the resultant vectors.
-    if (!VectorTy)
-      VectorTy = VInTy;
-    ScalarKind = Vector;
-    return true;
-  }
-
-  return false;
-}
-
-/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
-/// its accesses to a single vector type, return true and set VecTy to
-/// the new type.  If we could convert the alloca into a single promotable
-/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
-/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
-/// is the current offset from the base of the alloca being analyzed.
-///
-/// If we see at least one access to the value that is as a vector type, set the
-/// SawVec flag.
-bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
-                                             Value *NonConstantIdx) {
-  for (User *U : V->users()) {
-    Instruction *UI = cast<Instruction>(U);
-
-    if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
-      // Don't break volatile loads.
-      if (!LI->isSimple())
-        return false;
-
-      HadNonMemTransferAccess = true;
-      MergeInTypeForLoadOrStore(LI->getType(), Offset);
-      continue;
-    }
-
-    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
-      // Storing the pointer, not into the value?
-      if (SI->getOperand(0) == V || !SI->isSimple())
-        return false;
-
-      HadNonMemTransferAccess = true;
-      MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
-      continue;
-    }
-
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(UI)) {
-      if (!onlyUsedByLifetimeMarkers(BCI))
-        IsNotTrivial = true; // Can't be mem2reg'd.
-      if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
-        return false;
-      continue;
-    }
-
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UI)) {
-      // If this is a GEP with a variable indices, we can't handle it.
-      PointerType *PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
-      if (!PtrTy)
-        return false;
-
-      // Compute the offset that this GEP adds to the pointer.
-      SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
-      Value *GEPNonConstantIdx = nullptr;
-      if (!GEP->hasAllConstantIndices()) {
-        if (!isa<VectorType>(PtrTy->getElementType()))
-          return false;
-        if (NonConstantIdx)
-          return false;
-        GEPNonConstantIdx = Indices.pop_back_val();
-        if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
-          return false;
-        HadDynamicAccess = true;
-      } else
-        GEPNonConstantIdx = NonConstantIdx;
-      uint64_t GEPOffset = DL.getIndexedOffset(PtrTy, Indices);
-      // See if all uses can be converted.
-      if (!CanConvertToScalar(GEP, Offset + GEPOffset, GEPNonConstantIdx))
-        return false;
-      IsNotTrivial = true; // Can't be mem2reg'd.
-      HadNonMemTransferAccess = true;
-      continue;
-    }
-
-    // If this is a constant sized memset of a constant value (e.g. 0) we can
-    // handle it.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(UI)) {
-      // Store to dynamic index.
-      if (NonConstantIdx)
-        return false;
-      // Store of constant value.
-      if (!isa<ConstantInt>(MSI->getValue()))
-        return false;
-
-      // Store of constant size.
-      ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
-      if (!Len)
-        return false;
-
-      // If the size differs from the alloca, we can only convert the alloca to
-      // an integer bag-of-bits.
-      // FIXME: This should handle all of the cases that are currently accepted
-      // as vector element insertions.
-      if (Len->getZExtValue() != AllocaSize || Offset != 0)
-        ScalarKind = Integer;
-
-      IsNotTrivial = true; // Can't be mem2reg'd.
-      HadNonMemTransferAccess = true;
-      continue;
-    }
-
-    // If this is a memcpy or memmove into or out of the whole allocation, we
-    // can handle it like a load or store of the scalar type.
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(UI)) {
-      // Store to dynamic index.
-      if (NonConstantIdx)
-        return false;
-      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
-      if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
-        return false;
-
-      IsNotTrivial = true; // Can't be mem2reg'd.
-      continue;
-    }
-
-    // If this is a lifetime intrinsic, we can handle it.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI)) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-          II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        continue;
-      }
-    }
-
-    // Otherwise, we cannot handle this!
-    return false;
-  }
-
-  return true;
-}
-
-/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
-/// directly.  This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.  By the end of this, there should be no uses of Ptr.
-void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
-                                              uint64_t Offset,
-                                              Value *NonConstantIdx) {
-  while (!Ptr->use_empty()) {
-    Instruction *User = cast<Instruction>(Ptr->user_back());
-
-    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
-      ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
-      CI->eraseFromParent();
-      continue;
-    }
-
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      // Compute the offset that this GEP adds to the pointer.
-      SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
-      Value *GEPNonConstantIdx = nullptr;
-      if (!GEP->hasAllConstantIndices()) {
-        assert(!NonConstantIdx &&
-               "Dynamic GEP reading from dynamic GEP unsupported");
-        GEPNonConstantIdx = Indices.pop_back_val();
-      } else
-        GEPNonConstantIdx = NonConstantIdx;
-      uint64_t GEPOffset =
-          DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
-      ConvertUsesToScalar(GEP, NewAI, Offset + GEPOffset * 8,
-                          GEPNonConstantIdx);
-      GEP->eraseFromParent();
-      continue;
-    }
-
-    IRBuilder<> Builder(User);
-
-    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      // The load is a bit extract from NewAI shifted right by Offset bits.
-      Value *LoadedVal = Builder.CreateLoad(NewAI);
-      Value *NewLoadVal = ConvertScalar_ExtractValue(
-          LoadedVal, LI->getType(), Offset, NonConstantIdx, Builder);
-      LI->replaceAllUsesWith(NewLoadVal);
-      LI->eraseFromParent();
-      continue;
-    }
-
-    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-      assert(SI->getOperand(0) != Ptr && "Consistency error!");
-      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName() + ".in");
-      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
-                                             NonConstantIdx, Builder);
-      Builder.CreateStore(New, NewAI);
-      SI->eraseFromParent();
-
-      // If the load we just inserted is now dead, then the inserted store
-      // overwrote the entire thing.
-      if (Old->use_empty())
-        Old->eraseFromParent();
-      continue;
-    }
-
-    // If this is a constant sized memset of a constant value (e.g. 0) we can
-    // transform it into a store of the expanded constant value.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      assert(MSI->getRawDest() == Ptr && "Consistency error!");
-      assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
-      int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
-      if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
-        unsigned NumBytes = static_cast<unsigned>(SNumBytes);
-        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-
-        // Compute the value replicated the right number of times.
-        APInt APVal(NumBytes * 8, Val);
-
-        // Splat the value if non-zero.
-        if (Val)
-          for (unsigned i = 1; i != NumBytes; ++i)
-            APVal |= APVal << 8;
-
-        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName() + ".in");
-        Value *New = ConvertScalar_InsertValue(
-            ConstantInt::get(User->getContext(), APVal), Old, Offset, nullptr,
-            Builder);
-        Builder.CreateStore(New, NewAI);
-
-        // If the load we just inserted is now dead, then the memset overwrote
-        // the entire thing.
-        if (Old->use_empty())
-          Old->eraseFromParent();
-      }
-      MSI->eraseFromParent();
-      continue;
-    }
-
-    // If this is a memcpy or memmove into or out of the whole allocation, we
-    // can handle it like a load or store of the scalar type.
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
-      assert(Offset == 0 && "must be store to start of alloca");
-      assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
-
-      // If the source and destination are both to the same alloca, then this is
-      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
-      // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0));
-
-      if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
-        // Dest must be OrigAI, change this to be a load from the original
-        // pointer (bitcasted), then a store to our new alloca.
-        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
-        Value *SrcPtr = MTI->getSource();
-        PointerType *SPTy = cast<PointerType>(SrcPtr->getType());
-        PointerType *AIPTy = cast<PointerType>(NewAI->getType());
-        if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
-          AIPTy = PointerType::get(AIPTy->getElementType(),
-                                   SPTy->getAddressSpace());
-        }
-        SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
-
-        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
-        SrcVal->setAlignment(MTI->getAlignment());
-        Builder.CreateStore(SrcVal, NewAI);
-      } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
-        // Src must be OrigAI, change this to be a load from NewAI then a store
-        // through the original dest pointer (bitcasted).
-        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
-        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
-
-        PointerType *DPTy = cast<PointerType>(MTI->getDest()->getType());
-        PointerType *AIPTy = cast<PointerType>(NewAI->getType());
-        if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
-          AIPTy = PointerType::get(AIPTy->getElementType(),
-                                   DPTy->getAddressSpace());
-        }
-        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
-
-        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
-        NewStore->setAlignment(MTI->getAlignment());
-      } else {
-        // Noop transfer. Src == Dst
-      }
-
-      MTI->eraseFromParent();
-      continue;
-    }
-
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-          II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        // There's no need to preserve these, as the resulting alloca will be
-        // converted to a register anyways.
-        II->eraseFromParent();
-        continue;
-      }
-    }
-
-    llvm_unreachable("Unsupported operation!");
-  }
-}
-
-/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
-/// or vector value FromVal, extracting the bits from the offset specified by
-/// Offset.  This returns the value, which is of type ToType.
-///
-/// This happens when we are converting an "integer union" to a single
-/// integer scalar, or when we are converting a "vector union" to a vector with
-/// insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-Value *ConvertToScalarInfo::ConvertScalar_ExtractValue(Value *FromVal,
-                                                       Type *ToType,
-                                                       uint64_t Offset,
-                                                       Value *NonConstantIdx,
-                                                       IRBuilder<> &Builder) {
-  // If the load is of the whole new alloca, no conversion is needed.
-  Type *FromType = FromVal->getType();
-  if (FromType == ToType && Offset == 0)
-    return FromVal;
-
-  // If the result alloca is a vector type, this is either an element
-  // access or a bitcast to another vector type of the same size.
-  if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
-    unsigned FromTypeSize = DL.getTypeAllocSize(FromType);
-    unsigned ToTypeSize = DL.getTypeAllocSize(ToType);
-    if (FromTypeSize == ToTypeSize)
-      return Builder.CreateBitCast(FromVal, ToType);
-
-    // Otherwise it must be an element access.
-    unsigned Elt = 0;
-    if (Offset) {
-      unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
-      Elt = Offset / EltSize;
-      assert(EltSize * Elt == Offset && "Invalid modulus in validity checking");
-    }
-    // Return the element extracted out of it.
-    Value *Idx;
-    if (NonConstantIdx) {
-      if (Elt)
-        Idx = Builder.CreateAdd(NonConstantIdx, Builder.getInt32(Elt),
-                                "dyn.offset");
-      else
-        Idx = NonConstantIdx;
-    } else
-      Idx = Builder.getInt32(Elt);
-    Value *V = Builder.CreateExtractElement(FromVal, Idx);
-    if (V->getType() != ToType)
-      V = Builder.CreateBitCast(V, ToType);
-    return V;
-  }
-
-  // If ToType is a first class aggregate, extract out each of the pieces and
-  // use insertvalue's to form the FCA.
-  if (StructType *ST = dyn_cast<StructType>(ToType)) {
-    assert(!NonConstantIdx &&
-           "Dynamic indexing into struct types not supported");
-    const StructLayout &Layout = *DL.getStructLayout(ST);
-    Value *Res = UndefValue::get(ST);
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-      Value *Elt = ConvertScalar_ExtractValue(
-          FromVal, ST->getElementType(i),
-          Offset + Layout.getElementOffsetInBits(i), nullptr, Builder);
-      Res = Builder.CreateInsertValue(Res, Elt, i);
-    }
-    return Res;
-  }
-
-  if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
-    assert(!NonConstantIdx &&
-           "Dynamic indexing into array types not supported");
-    uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
-    Value *Res = UndefValue::get(AT);
-    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
-      Value *Elt =
-          ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
-                                     Offset + i * EltSize, nullptr, Builder);
-      Res = Builder.CreateInsertValue(Res, Elt, i);
-    }
-    return Res;
-  }
-
-  // Otherwise, this must be a union that was converted to an integer value.
-  IntegerType *NTy = cast<IntegerType>(FromVal->getType());
-
-  // If this is a big-endian system and the load is narrower than the
-  // full alloca type, we need to do a shift to get the right bits.
-  int ShAmt = 0;
-  if (DL.isBigEndian()) {
-    // On big-endian machines, the lowest bit is stored at the bit offset
-    // from the pointer given by getTypeStoreSizeInBits.  This matters for
-    // integers with a bitwidth that is not a multiple of 8.
-    ShAmt = DL.getTypeStoreSizeInBits(NTy) - DL.getTypeStoreSizeInBits(ToType) -
-            Offset;
-  } else {
-    ShAmt = Offset;
-  }
-
-  // Note: we support negative bitwidths (with shl) which are not defined.
-  // We do this to support (f.e.) loads off the end of a structure where
-  // only some bits are used.
-  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateLShr(FromVal,
-                                 ConstantInt::get(FromVal->getType(), ShAmt));
-  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateShl(FromVal,
-                                ConstantInt::get(FromVal->getType(), -ShAmt));
-
-  // Finally, unconditionally truncate the integer to the right width.
-  unsigned LIBitWidth = DL.getTypeSizeInBits(ToType);
-  if (LIBitWidth < NTy->getBitWidth())
-    FromVal = Builder.CreateTrunc(
-        FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth));
-  else if (LIBitWidth > NTy->getBitWidth())
-    FromVal = Builder.CreateZExt(
-        FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth));
-
-  // If the result is an integer, this is a trunc or bitcast.
-  if (ToType->isIntegerTy()) {
-    // Should be done.
-  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
-    // Just do a bitcast, we know the sizes match up.
-    FromVal = Builder.CreateBitCast(FromVal, ToType);
-  } else {
-    // Otherwise must be a pointer.
-    FromVal = Builder.CreateIntToPtr(FromVal, ToType);
-  }
-  assert(FromVal->getType() == ToType && "Didn't convert right?");
-  return FromVal;
-}
-
-/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
-/// or vector value "Old" at the offset specified by Offset.
-///
-/// This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-///
-/// NonConstantIdx is an index value if there was a GEP with a non-constant
-/// index value.  If this is 0 then all GEPs used to find this insert address
-/// are constant.
-Value *ConvertToScalarInfo::ConvertScalar_InsertValue(Value *SV, Value *Old,
-                                                      uint64_t Offset,
-                                                      Value *NonConstantIdx,
-                                                      IRBuilder<> &Builder) {
-  // Convert the stored type to the actual type, shift it left to insert
-  // then 'or' into place.
-  Type *AllocaType = Old->getType();
-  LLVMContext &Context = Old->getContext();
-
-  if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
-    uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy);
-    uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType());
-
-    // Changing the whole vector with memset or with an access of a different
-    // vector type?
-    if (ValSize == VecSize)
-      return Builder.CreateBitCast(SV, AllocaType);
-
-    // Must be an element insertion.
-    Type *EltTy = VTy->getElementType();
-    if (SV->getType() != EltTy)
-      SV = Builder.CreateBitCast(SV, EltTy);
-    uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy);
-    unsigned Elt = Offset / EltSize;
-    Value *Idx;
-    if (NonConstantIdx) {
-      if (Elt)
-        Idx = Builder.CreateAdd(NonConstantIdx, Builder.getInt32(Elt),
-                                "dyn.offset");
-      else
-        Idx = NonConstantIdx;
-    } else
-      Idx = Builder.getInt32(Elt);
-    return Builder.CreateInsertElement(Old, SV, Idx);
-  }
-
-  // If SV is a first-class aggregate value, insert each value recursively.
-  if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
-    assert(!NonConstantIdx &&
-           "Dynamic indexing into struct types not supported");
-    const StructLayout &Layout = *DL.getStructLayout(ST);
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-      Value *Elt = Builder.CreateExtractValue(SV, i);
-      Old = ConvertScalar_InsertValue(Elt, Old,
-                                      Offset + Layout.getElementOffsetInBits(i),
-                                      nullptr, Builder);
-    }
-    return Old;
-  }
-
-  if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
-    assert(!NonConstantIdx &&
-           "Dynamic indexing into array types not supported");
-    uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
-    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
-      Value *Elt = Builder.CreateExtractValue(SV, i);
-      Old = ConvertScalar_InsertValue(Elt, Old, Offset + i * EltSize, nullptr,
-                                      Builder);
-    }
-    return Old;
-  }
-
-  // If SV is a float, convert it to the appropriate integer type.
-  // If it is a pointer, do the same.
-  unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType());
-  unsigned DestWidth = DL.getTypeSizeInBits(AllocaType);
-  unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType());
-  unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType);
-  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
-    SV =
-        Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(), SrcWidth));
-  else if (SV->getType()->isPointerTy())
-    SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType()));
-
-  // Zero extend or truncate the value if needed.
-  if (SV->getType() != AllocaType) {
-    if (SV->getType()->getPrimitiveSizeInBits() <
-        AllocaType->getPrimitiveSizeInBits())
-      SV = Builder.CreateZExt(SV, AllocaType);
-    else {
-      // Truncation may be needed if storing more than the alloca can hold
-      // (undefined behavior).
-      SV = Builder.CreateTrunc(SV, AllocaType);
-      SrcWidth = DestWidth;
-      SrcStoreWidth = DestStoreWidth;
-    }
-  }
-
-  // If this is a big-endian system and the store is narrower than the
-  // full alloca type, we need to do a shift to get the right bits.
-  int ShAmt = 0;
-  if (DL.isBigEndian()) {
-    // On big-endian machines, the lowest bit is stored at the bit offset
-    // from the pointer given by getTypeStoreSizeInBits.  This matters for
-    // integers with a bitwidth that is not a multiple of 8.
-    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
-  } else {
-    ShAmt = Offset;
-  }
-
-  // Note: we support negative bitwidths (with shr) which are not defined.
-  // We do this to support (f.e.) stores off the end of a structure where
-  // only some bits in the structure are set.
-  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
-  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
-    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
-    Mask <<= ShAmt;
-  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
-    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
-    Mask = Mask.lshr(-ShAmt);
-  }
-
-  // Mask out the bits we are about to insert from the old value, and or
-  // in the new bits.
-  if (SrcWidth != DestWidth) {
-    assert(DestWidth > SrcWidth);
-    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
-    SV = Builder.CreateOr(Old, SV, "ins");
-  }
-  return SV;
-}
-
 //===----------------------------------------------------------------------===//
 // SRoA Driver
 //===----------------------------------------------------------------------===//
@@ -6571,778 +5810,3 @@ INITIALIZE_PASS(LowerStaticGlobalIntoAlloca, "static-global-to-alloca",
 ModulePass *llvm::createLowerStaticGlobalIntoAlloca() {
   return new LowerStaticGlobalIntoAlloca();
 }
-
-//===----------------------------------------------------------------------===//
-// Lower one type to another type.
-//===----------------------------------------------------------------------===//
-namespace {
-class LowerTypePass : public ModulePass {
-public:
-  explicit LowerTypePass(char &ID)
-      : ModulePass(ID) {}
-
-  bool runOnModule(Module &M) override;
-private:
-  bool runOnFunction(Function &F, bool HasDbgInfo);
-  AllocaInst *lowerAlloca(AllocaInst *A);
-  GlobalVariable *lowerInternalGlobal(GlobalVariable *GV);
-protected:
-  virtual bool needToLower(Value *V) = 0;
-  virtual void lowerUseWithNewValue(Value *V, Value *NewV) = 0;
-  virtual Type *lowerType(Type *Ty) = 0;
-  virtual Constant *lowerInitVal(Constant *InitVal, Type *NewTy) = 0;
-  virtual StringRef getGlobalPrefix() = 0;
-  virtual void initialize(Module &M) {};
-};
-
-AllocaInst *LowerTypePass::lowerAlloca(AllocaInst *A) {
-  IRBuilder<> AllocaBuilder(A);
-  Type *NewTy = lowerType(A->getAllocatedType());
-  return AllocaBuilder.CreateAlloca(NewTy);
-}
-
-GlobalVariable *LowerTypePass::lowerInternalGlobal(GlobalVariable *GV) {
-  Type *NewTy = lowerType(GV->getType()->getPointerElementType());
-  // So set init val to undef.
-  Constant *InitVal = UndefValue::get(NewTy);
-  if (GV->hasInitializer()) {
-    Constant *OldInitVal = GV->getInitializer();
-    if (isa<ConstantAggregateZero>(OldInitVal))
-      InitVal = ConstantAggregateZero::get(NewTy);
-    else if (!isa<UndefValue>(OldInitVal)) {
-      InitVal = lowerInitVal(OldInitVal, NewTy);
-    }
-  }
-
-  bool isConst = GV->isConstant();
-  GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
-  unsigned AddressSpace = GV->getType()->getAddressSpace();
-  GlobalValue::LinkageTypes linkage = GV->getLinkage();
-
-  Module *M = GV->getParent();
-  GlobalVariable *NewGV = new llvm::GlobalVariable(
-      *M, NewTy, /*IsConstant*/ isConst, linkage,
-      /*InitVal*/ InitVal, GV->getName() + getGlobalPrefix(),
-      /*InsertBefore*/ nullptr, TLMode, AddressSpace);
-  return NewGV;
-}
-
-bool LowerTypePass::runOnFunction(Function &F, bool HasDbgInfo) {
-  std::vector<AllocaInst *> workList;
-  // Scan the entry basic block, adding allocas to the worklist.
-  BasicBlock &BB = F.getEntryBlock();
-  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
-    if (!isa<AllocaInst>(I))
-      continue;
-    AllocaInst *A = cast<AllocaInst>(I);
-    if (needToLower(A))
-      workList.emplace_back(A);
-  }
-  LLVMContext &Context = F.getContext();
-  for (AllocaInst *A : workList) {
-    AllocaInst *NewA = lowerAlloca(A);
-    if (HasDbgInfo) {
-      // Add debug info.
-      DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A);
-      if (DDI) {
-        Value *DDIVar = MetadataAsValue::get(Context, DDI->getRawVariable());
-        Value *DDIExp = MetadataAsValue::get(Context, DDI->getRawExpression());
-        Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(NewA));
-        IRBuilder<> debugBuilder(DDI);
-        debugBuilder.CreateCall(DDI->getCalledFunction(),
-                                {VMD, DDIVar, DDIExp});
-      }
-    }
-    // Replace users.
-    lowerUseWithNewValue(A, NewA);
-    // Remove alloca.
-    A->eraseFromParent();
-  }
-  return true;
-}
-
-bool LowerTypePass::runOnModule(Module &M) {
-  initialize(M);
-  // Load up debug information, to cross-reference values and the instructions
-  // used to load them.
-  bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
-  llvm::DebugInfoFinder Finder;
-  if (HasDbgInfo) {
-    Finder.processModule(M);
-  }
-
-  std::vector<AllocaInst*> multiDimAllocas;
-  for (Function &F : M.functions()) {
-    if (F.isDeclaration())
-      continue;
-    runOnFunction(F, HasDbgInfo);
-  }
-
-  // Work on internal global.
-  std::vector<GlobalVariable *> vecGVs;
-  for (GlobalVariable &GV : M.globals()) {
-    if (dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV)) {
-      if (needToLower(&GV) && !GV.user_empty())
-        vecGVs.emplace_back(&GV);
-    }
-  }
-
-  for (GlobalVariable *GV : vecGVs) {
-    GlobalVariable *NewGV = lowerInternalGlobal(GV);
-    // Add debug info.
-    if (HasDbgInfo) {
-      HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, NewGV);
-    }
-    // Replace users.
-    lowerUseWithNewValue(GV, NewGV);
-    // Remove GV.
-    GV->removeDeadConstantUsers();
-    GV->eraseFromParent();
-  }
-
-  return true;
-}
-
-}
-
-
-//===----------------------------------------------------------------------===//
-// DynamicIndexingVector to Array.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class DynamicIndexingVectorToArray : public LowerTypePass {
-  bool ReplaceAllVectors;
-public:
-  explicit DynamicIndexingVectorToArray(bool ReplaceAll = false)
-      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {}
-  static char ID; // Pass identification, replacement for typeid
-  void applyOptions(PassOptions O) override;
-  void dumpConfig(raw_ostream &OS) override;
-protected:
-  bool needToLower(Value *V) override;
-  void lowerUseWithNewValue(Value *V, Value *NewV) override;
-  Type *lowerType(Type *Ty) override;
-  Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
-  StringRef getGlobalPrefix() override { return ".v"; }
-
-private:
-  bool HasVectorDynamicIndexing(Value *V);
-  void ReplaceVecGEP(Value *GEP, ArrayRef<Value *> idxList, Value *A,
-                     IRBuilder<> &Builder);
-  void ReplaceVecArrayGEP(Value *GEP, ArrayRef<Value *> idxList, Value *A,
-                          IRBuilder<> &Builder);
-  void ReplaceVectorWithArray(Value *Vec, Value *Array);
-  void ReplaceVectorArrayWithArray(Value *VecArray, Value *Array);
-  void ReplaceStaticIndexingOnVector(Value *V);
-  void ReplaceAddrSpaceCast(ConstantExpr *CE,
-                            Value *A, IRBuilder<> &Builder);
-};
-
-void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
-  GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors,
-                    ReplaceAllVectors);
-}
-void DynamicIndexingVectorToArray::dumpConfig(raw_ostream &OS) {
-  ModulePass::dumpConfig(OS);
-  OS << ",ReplaceAllVectors=" << ReplaceAllVectors;
-}
-
-void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
-  for (auto U = V->user_begin(), E = V->user_end(); U != E;) {
-    Value *User = *(U++);
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      // Only work on element access for vector.
-      if (GEP->getNumOperands() == 3) {
-        auto Idx = GEP->idx_begin();
-        // Skip the pointer idx.
-        Idx++;
-        ConstantInt *constIdx = cast<ConstantInt>(Idx);
-
-        for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end();
-             GEPU != GEPE;) {
-          Instruction *GEPUser = cast<Instruction>(*(GEPU++));
-
-          IRBuilder<> Builder(GEPUser);
-
-          if (LoadInst *ldInst = dyn_cast<LoadInst>(GEPUser)) {
-            // Change
-            //    ld a->x
-            // into
-            //    b = ld a
-            //    b.x
-            Value *ldVal = Builder.CreateLoad(V);
-            Value *Elt = Builder.CreateExtractElement(ldVal, constIdx);
-            ldInst->replaceAllUsesWith(Elt);
-            ldInst->eraseFromParent();
-          } else {
-            // Change
-            //    st val, a->x
-            // into
-            //    tmp = ld a
-            //    tmp.x = val
-            //    st tmp, a
-            // Must be store inst here.
-            StoreInst *stInst = cast<StoreInst>(GEPUser);
-            Value *val = stInst->getValueOperand();
-            Value *ldVal = Builder.CreateLoad(V);
-            ldVal = Builder.CreateInsertElement(ldVal, val, constIdx);
-            Builder.CreateStore(ldVal, V);
-            stInst->eraseFromParent();
-          }
-        }
-        GEP->eraseFromParent();
-      } else if (GEP->getNumIndices() == 1) {
-        Value *Idx = *GEP->idx_begin();
-        if (ConstantInt *C = dyn_cast<ConstantInt>(Idx)) {
-          if (C->getLimitedValue() == 0) {
-            GEP->replaceAllUsesWith(V);
-            GEP->eraseFromParent();
-          }
-        }
-      }
-    }
-  }
-}
-
-bool DynamicIndexingVectorToArray::needToLower(Value *V) {
-  Type *Ty = V->getType()->getPointerElementType();
-  if (dyn_cast<VectorType>(Ty)) {
-    if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
-      return true;
-    }
-    // Don't lower local vector which only static indexing.
-    if (HasVectorDynamicIndexing(V)) {
-      return true;
-    } else {
-      // Change vector indexing with ld st.
-      ReplaceStaticIndexingOnVector(V);
-      return false;
-    }
-  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-    // Array must be replaced even without dynamic indexing to remove vector
-    // type in dxil.
-    // TODO: optimize static array index in later pass.
-    Type *EltTy = dxilutil::GetArrayEltTy(AT);
-    return isa<VectorType>(EltTy);
-  }
-  return false;
-}
-
-void DynamicIndexingVectorToArray::ReplaceVecGEP(Value *GEP, ArrayRef<Value *> idxList,
-                                       Value *A, IRBuilder<> &Builder) {
-  Value *newGEP = Builder.CreateGEP(A, idxList);
-  if (GEP->getType()->getPointerElementType()->isVectorTy()) {
-    ReplaceVectorWithArray(GEP, newGEP);
-  } else {
-    GEP->replaceAllUsesWith(newGEP);
-  }
-}
-
-void DynamicIndexingVectorToArray::ReplaceAddrSpaceCast(ConstantExpr *CE,
-                                              Value *A, IRBuilder<> &Builder) {
-  // create new AddrSpaceCast.
-  Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
-    A,
-    PointerType::get(A->getType()->getPointerElementType(),
-                      CE->getType()->getPointerAddressSpace()));
-  ReplaceVectorWithArray(CE, NewAddrSpaceCast);
-}
-
-void DynamicIndexingVectorToArray::ReplaceVectorWithArray(Value *Vec, Value *A) {
-  unsigned size = Vec->getType()->getPointerElementType()->getVectorNumElements();
-  for (auto U = Vec->user_begin(); U != Vec->user_end();) {
-    User *User = (*U++);
-
-    // GlobalVariable user.
-    if (ConstantExpr * CE = dyn_cast<ConstantExpr>(User)) {
-      if (User->user_empty())
-        continue;
-      if (GEPOperator *GEP = dyn_cast<GEPOperator>(User)) {
-        IRBuilder<> Builder(Vec->getContext());
-        SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
-        ReplaceVecGEP(GEP, idxList, A, Builder);
-        continue;
-      } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
-        IRBuilder<> Builder(Vec->getContext());
-        ReplaceAddrSpaceCast(CE, A, Builder);
-        continue;
-      }
-      DXASSERT(0, "not implemented yet");
-    }
-    // Instrution user.
-    Instruction *UserInst = cast<Instruction>(User);
-    IRBuilder<> Builder(UserInst);
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
-      ReplaceVecGEP(cast<GEPOperator>(GEP), idxList, A, Builder);
-      GEP->eraseFromParent();
-    } else if (LoadInst *ldInst = dyn_cast<LoadInst>(User)) {
-      // If ld whole struct, need to split the load.
-      Value *newLd = UndefValue::get(ldInst->getType());
-      Value *zero = Builder.getInt32(0);
-      for (unsigned i = 0; i < size; i++) {
-        Value *idx = Builder.getInt32(i);
-        Value *GEP = Builder.CreateInBoundsGEP(A, {zero, idx});
-        Value *Elt = Builder.CreateLoad(GEP);
-        newLd = Builder.CreateInsertElement(newLd, Elt, i);
-      }
-      ldInst->replaceAllUsesWith(newLd);
-      ldInst->eraseFromParent();
-    } else if (StoreInst *stInst = dyn_cast<StoreInst>(User)) {
-      Value *val = stInst->getValueOperand();
-      Value *zero = Builder.getInt32(0);
-      for (unsigned i = 0; i < size; i++) {
-        Value *Elt = Builder.CreateExtractElement(val, i);
-        Value *idx = Builder.getInt32(i);
-        Value *GEP = Builder.CreateInBoundsGEP(A, {zero, idx});
-        Builder.CreateStore(Elt, GEP);
-      }
-      stInst->eraseFromParent();
-    } else {
-      // Vector parameter should be lowered.
-      // No function call should use vector.
-      DXASSERT(0, "not implement yet");
-    }
-  }
-}
-
-void DynamicIndexingVectorToArray::ReplaceVecArrayGEP(Value *GEP,
-                                            ArrayRef<Value *> idxList, Value *A,
-                                            IRBuilder<> &Builder) {
-  Value *newGEP = Builder.CreateGEP(A, idxList);
-  Type *Ty = GEP->getType()->getPointerElementType();
-  if (Ty->isVectorTy()) {
-    ReplaceVectorWithArray(GEP, newGEP);
-  } else if (Ty->isArrayTy()) {
-    ReplaceVectorArrayWithArray(GEP, newGEP);
-  } else {
-    DXASSERT(Ty->isSingleValueType(), "must be vector subscript here");
-    GEP->replaceAllUsesWith(newGEP);
-  }
-}
-
-void DynamicIndexingVectorToArray::ReplaceVectorArrayWithArray(Value *VA, Value *A) {
-  for (auto U = VA->user_begin(); U != VA->user_end();) {
-    User *User = *(U++);
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      IRBuilder<> Builder(GEP);
-      SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
-      ReplaceVecArrayGEP(GEP, idxList, A, Builder);
-      GEP->eraseFromParent();
-    } else if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(User)) {
-      IRBuilder<> Builder(GEPOp->getContext());
-      SmallVector<Value *, 4> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
-      ReplaceVecArrayGEP(GEPOp, idxList, A, Builder);
-    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
-      BCI->setOperand(0, A);
-    } else {
-      DXASSERT(0, "Array pointer should only used by GEP");
-    }
-  }
-}
-
-void DynamicIndexingVectorToArray::lowerUseWithNewValue(Value *V, Value *NewV) {
-  Type *Ty = V->getType()->getPointerElementType();
-  // Replace V with NewV.
-  if (Ty->isVectorTy()) {
-    ReplaceVectorWithArray(V, NewV);
-  } else {
-    ReplaceVectorArrayWithArray(V, NewV);
-  }
-}
-
-Type *DynamicIndexingVectorToArray::lowerType(Type *Ty) {
-  if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
-    return ArrayType::get(VT->getElementType(), VT->getNumElements());
-  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-    SmallVector<ArrayType *, 4> nestArrayTys;
-    nestArrayTys.emplace_back(AT);
-
-    Type *EltTy = AT->getElementType();
-    // support multi level of array
-    while (EltTy->isArrayTy()) {
-      ArrayType *ElAT = cast<ArrayType>(EltTy);
-      nestArrayTys.emplace_back(ElAT);
-      EltTy = ElAT->getElementType();
-    }
-    if (EltTy->isVectorTy()) {
-      Type *vecAT = ArrayType::get(EltTy->getVectorElementType(),
-                                   EltTy->getVectorNumElements());
-      return CreateNestArrayTy(vecAT, nestArrayTys);
-    }
-    return nullptr;
-  }
-  return nullptr;
-}
-
-Constant *DynamicIndexingVectorToArray::lowerInitVal(Constant *InitVal, Type *NewTy) {
-  Type *VecTy = InitVal->getType();
-  ArrayType *ArrayTy = cast<ArrayType>(NewTy);
-  if (VecTy->isVectorTy()) {
-    SmallVector<Constant *, 4> Elts;
-    for (unsigned i = 0; i < VecTy->getVectorNumElements(); i++) {
-      Elts.emplace_back(InitVal->getAggregateElement(i));
-    }
-    return ConstantArray::get(ArrayTy, Elts);
-  } else {
-    ArrayType *AT = cast<ArrayType>(VecTy);
-    ArrayType *EltArrayTy = cast<ArrayType>(ArrayTy->getElementType());
-    SmallVector<Constant *, 4> Elts;
-    for (unsigned i = 0; i < AT->getNumElements(); i++) {
-      Constant *Elt = lowerInitVal(InitVal->getAggregateElement(i), EltArrayTy);
-      Elts.emplace_back(Elt);
-    }
-    return ConstantArray::get(ArrayTy, Elts);
-  }
-}
-
-bool DynamicIndexingVectorToArray::HasVectorDynamicIndexing(Value *V) {
-  return dxilutil::HasDynamicIndexing(V);
-}
-
-}
-
-char DynamicIndexingVectorToArray::ID = 0;
-
-INITIALIZE_PASS(DynamicIndexingVectorToArray, "dynamic-vector-to-array",
-  "Replace dynamic indexing vector with array", false,
-  false)
-
-// Public interface to the DynamicIndexingVectorToArray pass
-ModulePass *llvm::createDynamicIndexingVectorToArrayPass(bool ReplaceAllVector) {
-  return new DynamicIndexingVectorToArray(ReplaceAllVector);
-}
-
-//===----------------------------------------------------------------------===//
-// Flatten multi dim array into 1 dim.
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class MultiDimArrayToOneDimArray : public LowerTypePass {
-public:
-  explicit MultiDimArrayToOneDimArray() : LowerTypePass(ID) {}
-  static char ID; // Pass identification, replacement for typeid
-protected:
-  bool needToLower(Value *V) override;
-  void lowerUseWithNewValue(Value *V, Value *NewV) override;
-  Type *lowerType(Type *Ty) override;
-  Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
-  StringRef getGlobalPrefix() override { return ".1dim"; }
-};
-
-bool MultiDimArrayToOneDimArray::needToLower(Value *V) {
-  Type *Ty = V->getType()->getPointerElementType();
-  ArrayType *AT = dyn_cast<ArrayType>(Ty);
-  if (!AT)
-    return false;
-  if (!isa<ArrayType>(AT->getElementType())) {
-    return false;
-  } else {
-    // Merge all GEP.
-    HLModule::MergeGepUse(V);
-    return true;
-  }
-}
-
-void ReplaceMultiDimGEP(User *GEP, Value *OneDim, IRBuilder<> &Builder) {
-  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
-
-  Value *PtrOffset = GEPIt.getOperand();
-  ++GEPIt;
-  Value *ArrayIdx = GEPIt.getOperand();
-  ++GEPIt;
-  Value *VecIdx = nullptr;
-  for (; GEPIt != E; ++GEPIt) {
-    if (GEPIt->isArrayTy()) {
-      unsigned arraySize = GEPIt->getArrayNumElements();
-      Value *V = GEPIt.getOperand();
-      ArrayIdx = Builder.CreateMul(ArrayIdx, Builder.getInt32(arraySize));
-      ArrayIdx = Builder.CreateAdd(V, ArrayIdx);
-    } else {
-      DXASSERT_NOMSG(isa<VectorType>(*GEPIt));
-      VecIdx = GEPIt.getOperand();
-    }
-  }
-  Value *NewGEP = nullptr;
-  if (!VecIdx)
-    NewGEP = Builder.CreateGEP(OneDim, {PtrOffset, ArrayIdx});
-  else
-    NewGEP = Builder.CreateGEP(OneDim, {PtrOffset, ArrayIdx, VecIdx});
-
-  GEP->replaceAllUsesWith(NewGEP);
-}
-
-void MultiDimArrayToOneDimArray::lowerUseWithNewValue(Value *MultiDim, Value *OneDim) {
-  LLVMContext &Context = MultiDim->getContext();
-  // All users should be element type.
-  // Replace users of AI or GV.
-  for (auto it = MultiDim->user_begin(); it != MultiDim->user_end();) {
-    User *U = *(it++);
-    if (U->user_empty())
-      continue;
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      BCI->setOperand(0, OneDim);
-      continue;
-    }
-
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-      IRBuilder<> Builder(Context);
-      if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-        // NewGEP must be GEPOperator too.
-        // No instruction will be build.
-        ReplaceMultiDimGEP(U, OneDim, Builder);
-      } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
-        Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
-          OneDim,
-          PointerType::get(OneDim->getType()->getPointerElementType(),
-                           CE->getType()->getPointerAddressSpace()));
-        lowerUseWithNewValue(CE, NewAddrSpaceCast);
-      } else {
-        DXASSERT(0, "not implemented");
-      }
-    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
-      IRBuilder<> Builder(GEP);
-      ReplaceMultiDimGEP(U, OneDim, Builder);
-      GEP->eraseFromParent();
-    } else {
-      DXASSERT(0, "not implemented");
-    }
-  }
-}
-
-Type *MultiDimArrayToOneDimArray::lowerType(Type *Ty) {
-  ArrayType *AT = cast<ArrayType>(Ty);
-  unsigned arraySize = AT->getNumElements();
-
-  Type *EltTy = AT->getElementType();
-  // support multi level of array
-  while (EltTy->isArrayTy()) {
-    ArrayType *ElAT = cast<ArrayType>(EltTy);
-    arraySize *= ElAT->getNumElements();
-    EltTy = ElAT->getElementType();
-  }
-
-  return ArrayType::get(EltTy, arraySize);
-}
-
-void FlattenMultiDimConstArray(Constant *V, std::vector<Constant *> &Elts) {
-  if (!V->getType()->isArrayTy()) {
-    Elts.emplace_back(V);
-  } else {
-    ArrayType *AT = cast<ArrayType>(V->getType());
-    for (unsigned i = 0; i < AT->getNumElements(); i++) {
-      FlattenMultiDimConstArray(V->getAggregateElement(i), Elts);
-    }
-  }
-}
-
-Constant *MultiDimArrayToOneDimArray::lowerInitVal(Constant *InitVal, Type *NewTy) {
-  if (InitVal) {
-    // MultiDim array init should be done by store.
-    if (isa<ConstantAggregateZero>(InitVal))
-      InitVal = ConstantAggregateZero::get(NewTy);
-    else if (isa<UndefValue>(InitVal))
-      InitVal = UndefValue::get(NewTy);
-    else {
-      std::vector<Constant *> Elts;
-      FlattenMultiDimConstArray(InitVal, Elts);
-      InitVal = ConstantArray::get(cast<ArrayType>(NewTy), Elts);
-    }
-  } else {
-    InitVal = UndefValue::get(NewTy);
-  }
-  return InitVal;
-}
-
-}
-
-char MultiDimArrayToOneDimArray::ID = 0;
-
-INITIALIZE_PASS(MultiDimArrayToOneDimArray, "multi-dim-one-dim",
-  "Flatten multi-dim array into one-dim array", false,
-  false)
-
-// Public interface to the SROA_Parameter_HLSL pass
-ModulePass *llvm::createMultiDimArrayToOneDimArrayPass() {
-  return new MultiDimArrayToOneDimArray();
-}
-
-//===----------------------------------------------------------------------===//
-// Lower resource into handle.
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class ResourceToHandle : public LowerTypePass {
-public:
-  explicit ResourceToHandle() : LowerTypePass(ID) {}
-  static char ID; // Pass identification, replacement for typeid
-protected:
-  bool needToLower(Value *V) override;
-  void lowerUseWithNewValue(Value *V, Value *NewV) override;
-  Type *lowerType(Type *Ty) override;
-  Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
-  StringRef getGlobalPrefix() override { return ".res"; }
-  void initialize(Module &M) override;
-private:
-  void ReplaceResourceWithHandle(Value *ResPtr, Value *HandlePtr);
-  void ReplaceResourceGEPWithHandleGEP(Value *GEP, ArrayRef<Value *> idxList,
-                                       Value *A, IRBuilder<> &Builder);
-  void ReplaceResourceArrayWithHandleArray(Value *VA, Value *A);
-
-  Type *m_HandleTy;
-  HLModule *m_pHLM;
-  bool  m_bIsLib;
-};
-
-void ResourceToHandle::initialize(Module &M) {
-  DXASSERT(M.HasHLModule(), "require HLModule");
-  m_pHLM = &M.GetHLModule();
-  m_HandleTy = m_pHLM->GetOP()->GetHandleType();
-  m_bIsLib = m_pHLM->GetShaderModel()->IsLib();
-}
-
-bool ResourceToHandle::needToLower(Value *V) {
-  Type *Ty = V->getType()->getPointerElementType();
-  Ty = dxilutil::GetArrayEltTy(Ty);
-  return (dxilutil::IsHLSLObjectType(Ty) &&
-          !HLModule::IsStreamOutputType(Ty)) &&
-         // Skip lib profile.
-         !m_bIsLib;
-}
-
-Type *ResourceToHandle::lowerType(Type *Ty) {
-  if ((dxilutil::IsHLSLObjectType(Ty) && !HLModule::IsStreamOutputType(Ty))) {
-    return m_HandleTy;
-  }
-
-  ArrayType *AT = cast<ArrayType>(Ty);
-
-  SmallVector<ArrayType *, 4> nestArrayTys;
-  nestArrayTys.emplace_back(AT);
-
-  Type *EltTy = AT->getElementType();
-  // support multi level of array
-  while (EltTy->isArrayTy()) {
-    ArrayType *ElAT = cast<ArrayType>(EltTy);
-    nestArrayTys.emplace_back(ElAT);
-    EltTy = ElAT->getElementType();
-  }
-
-  return CreateNestArrayTy(m_HandleTy, nestArrayTys);
-}
-
-Constant *ResourceToHandle::lowerInitVal(Constant *InitVal, Type *NewTy) {
-  DXASSERT(isa<UndefValue>(InitVal), "resource cannot have real init val");
-  return UndefValue::get(NewTy);
-}
-
-void ResourceToHandle::ReplaceResourceWithHandle(Value *ResPtr,
-                                                 Value *HandlePtr) {
-  for (auto it = ResPtr->user_begin(); it != ResPtr->user_end();) {
-    User *U = *(it++);
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      IRBuilder<> Builder(LI);
-      Value *Handle = Builder.CreateLoad(HandlePtr);
-      Type *ResTy = LI->getType();
-      // Used by createHandle or Store.
-      for (auto ldIt = LI->user_begin(); ldIt != LI->user_end();) {
-        User *ldU = *(ldIt++);
-        if (StoreInst *SI = dyn_cast<StoreInst>(ldU)) {
-          Value *TmpRes = HLModule::EmitHLOperationCall(
-              Builder, HLOpcodeGroup::HLCast,
-              (unsigned)HLCastOpcode::HandleToResCast, ResTy, {Handle},
-              *m_pHLM->GetModule());
-          SI->replaceUsesOfWith(LI, TmpRes);
-        } else {
-          CallInst *CI = cast<CallInst>(ldU);
-          DXASSERT(hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) == HLOpcodeGroup::HLCreateHandle,
-                   "must be createHandle");
-          CI->replaceAllUsesWith(Handle);
-          CI->eraseFromParent();
-        }
-      }
-      LI->eraseFromParent();
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      Value *Res = SI->getValueOperand();
-      IRBuilder<> Builder(SI);
-      // CreateHandle from Res.
-      Value *Handle = HLModule::EmitHLOperationCall(
-          Builder, HLOpcodeGroup::HLCreateHandle,
-          /*opcode*/ 0, m_HandleTy, {Res}, *m_pHLM->GetModule());
-      // Store Handle to HandlePtr.
-      Builder.CreateStore(Handle, HandlePtr);
-      // Remove resource Store.
-      SI->eraseFromParent();
-    } else if (U->user_empty() && isa<GEPOperator>(U)) {
-      continue;
-    } else {
-      CallInst *CI = cast<CallInst>(U);
-      IRBuilder<> Builder(CI);
-      HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
-      // Allow user function to use res ptr as argument.
-      if (group == HLOpcodeGroup::NotHL) {
-          Value *TmpResPtr = Builder.CreateBitCast(HandlePtr, ResPtr->getType());
-          CI->replaceUsesOfWith(ResPtr, TmpResPtr);
-      } else {
-        DXASSERT(0, "invalid operation on resource");
-      }
-    }
-  }
-}
-
-void ResourceToHandle::ReplaceResourceGEPWithHandleGEP(
-    Value *GEP, ArrayRef<Value *> idxList, Value *A, IRBuilder<> &Builder) {
-  Value *newGEP = Builder.CreateGEP(A, idxList);
-  Type *Ty = GEP->getType()->getPointerElementType();
-  if (Ty->isArrayTy()) {
-    ReplaceResourceArrayWithHandleArray(GEP, newGEP);
-  } else {
-    DXASSERT(dxilutil::IsHLSLObjectType(Ty), "must be resource type here");
-    ReplaceResourceWithHandle(GEP, newGEP);
-  }
-}
-
-void ResourceToHandle::ReplaceResourceArrayWithHandleArray(Value *VA,
-                                                           Value *A) {
-  for (auto U = VA->user_begin(); U != VA->user_end();) {
-    User *User = *(U++);
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      IRBuilder<> Builder(GEP);
-      SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
-      ReplaceResourceGEPWithHandleGEP(GEP, idxList, A, Builder);
-      GEP->eraseFromParent();
-    } else if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(User)) {
-      IRBuilder<> Builder(GEPOp->getContext());
-      SmallVector<Value *, 4> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
-      ReplaceResourceGEPWithHandleGEP(GEPOp, idxList, A, Builder);
-    } else {
-      DXASSERT(0, "Array pointer should only used by GEP");
-    }
-  }
-}
-
-void ResourceToHandle::lowerUseWithNewValue(Value *V, Value *NewV) {
-  Type *Ty = V->getType()->getPointerElementType();
-  // Replace V with NewV.
-  if (Ty->isArrayTy()) {
-    ReplaceResourceArrayWithHandleArray(V, NewV);
-  } else {
-    ReplaceResourceWithHandle(V, NewV);
-  }
-}
-
-}
-
-char ResourceToHandle::ID = 0;
-
-INITIALIZE_PASS(ResourceToHandle, "resource-handle",
-  "Lower resource into handle", false,
-  false)
-
-// Public interface to the ResourceToHandle pass
-ModulePass *llvm::createResourceToHandlePass() {
-  return new ResourceToHandle();
-}