Browse Source

Code cleanup. (#2708)

* Code cleanup.
Xiang Li 5 years ago
parent
commit
4098229583

+ 4 - 0
include/dxc/DXIL/DxilResourceProperties.h

@@ -50,6 +50,7 @@ struct DxilResourceProperties {
 
   bool operator==(const DxilResourceProperties &);
   bool operator!=(const DxilResourceProperties &);
+  unsigned getSampleCount();
 };
 
 static_assert(sizeof(DxilResourceProperties) == 4 * sizeof(uint32_t),
@@ -63,6 +64,9 @@ struct DxilInst_AnnotateHandle;
 namespace resource_helper {
 llvm::Constant *getAsConstant(const DxilResourceProperties &, llvm::Type *Ty,
                               const ShaderModel &);
+DxilResourceProperties loadFromConstant(const llvm::Constant &C,
+                                        DXIL::ResourceClass RC,
+                                        DXIL::ResourceKind RK);
 DxilResourceProperties
 loadFromAnnotateHandle(DxilInst_AnnotateHandle &annotateHandle, llvm::Type *Ty,
                        const ShaderModel &);

+ 6 - 18
include/dxc/HLSL/HLModule.h

@@ -21,6 +21,7 @@
 #include "dxc/DXIL/DxilSignature.h"
 #include "dxc/DXIL/DxilFunctionProps.h"
 #include "dxc/DXIL/DxilSubobject.h"
+#include "dxc/DXIL/DxilResourceProperties.h"
 #include <memory>
 #include <string>
 #include <vector>
@@ -153,11 +154,6 @@ public:
   DxilFunctionAnnotation *GetFunctionAnnotation(llvm::Function *F);
   DxilFunctionAnnotation *AddFunctionAnnotation(llvm::Function *F);
 
-  void AddResourceTypeAnnotation(llvm::Type *Ty, DXIL::ResourceClass resClass,
-                                 DXIL::ResourceKind kind);
-  DXIL::ResourceClass GetResourceClass(llvm::Type *Ty);
-  DXIL::ResourceKind  GetResourceKind(llvm::Type *Ty);
-
   // Float Denorm mode.
   void SetFloat32DenormMode(const DXIL::Float32DenormMode mode);
   DXIL::Float32DenormMode GetFloat32DenormMode() const;
@@ -181,10 +177,12 @@ public:
   void LoadDxilResourceBaseFromMDNode(llvm::MDNode *MD, DxilResourceBase &R);
   void LoadDxilResourceFromMDNode(llvm::MDNode *MD, DxilResource &R);
   void LoadDxilSamplerFromMDNode(llvm::MDNode *MD, DxilSampler &S);
-  DxilResourceBase *AddResourceWithGlobalVariableAndMDNode(llvm::Constant *GV,
-                                                           llvm::MDNode *MD);
+  DxilResourceBase *
+  AddResourceWithGlobalVariableAndProps(llvm::Constant *GV,
+                                        DxilResourceProperties &RP);
   unsigned GetBindingForResourceInCB(llvm::GetElementPtrInst *CbPtr,
-                                     llvm::GlobalVariable *CbGV);
+                                     llvm::GlobalVariable *CbGV,
+                                     DXIL::ResourceClass RC);
 
   // Type related methods.
   static bool IsStreamOutputPtrType(llvm::Type *Ty);
@@ -219,12 +217,6 @@ public:
   static void MarkPreciseAttributeOnPtrWithFunctionCall(llvm::Value *Ptr,
                                                         llvm::Module &M);
   static bool HasPreciseAttribute(llvm::Function *F);
-  // Resource attribute.
-  static void  MarkDxilResourceAttrib(llvm::Function *F, llvm::MDNode *MD);
-  static llvm::MDNode *GetDxilResourceAttrib(llvm::Function *F);
-  void MarkDxilResourceAttrib(llvm::Argument *Arg, llvm::MDNode *MD);
-  llvm::MDNode *GetDxilResourceAttrib(llvm::Argument *Arg);
-  static llvm::MDNode *GetDxilResourceAttrib(llvm::Type *Ty, llvm::Module &M);
 
   // DXIL type system.
   DxilTypeSystem &GetTypeSystem();
@@ -279,8 +271,6 @@ private:
   std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>>  m_DxilFunctionPropsMap;
   std::unordered_set<llvm::Function *>  m_PatchConstantFunctions;
 
-  // Resource type annotation.
-  std::unordered_map<llvm::Type *, std::pair<DXIL::ResourceClass, DXIL::ResourceKind>> m_ResTypeAnnotation;
   // Resource bindings for res in cb.
   // Key = CbID << 32 | ConstantIdx. Val is reg binding.
   std::unordered_map<uint64_t, unsigned> m_SrvBindingInCB;
@@ -313,8 +303,6 @@ private:
   llvm::MDTuple *EmitHLShaderProperties();
   void LoadHLShaderProperties(const llvm::MDOperand &MDO);
   llvm::MDTuple *EmitDxilShaderProperties();
-  llvm::MDTuple *EmitResTyAnnotations();
-  void LoadResTyAnnotations(const llvm::MDOperand &MDO);
   // LLVM used.
   std::vector<llvm::GlobalVariable*> m_LLVMUsed;
 

+ 28 - 8
lib/DXIL/DxilResourceProperties.cpp

@@ -33,6 +33,21 @@ bool DxilResourceProperties::operator!=(const DxilResourceProperties &RP) {
   return !(*this == RP) ;
 }
 
+unsigned DxilResourceProperties::getSampleCount() {
+  assert(DXIL::IsTyped(Kind));
+  const unsigned SampleCountTable[] = {
+    1,  // 0
+    2,  // 1
+    4,  // 2
+    8,  // 3
+    16, // 4
+    32, // 5
+    0,  // 6
+    0,  // kSampleCountUndefined.
+  };
+  return SampleCountTable[Typed.SampleCountPow2];
+}
+
 namespace resource_helper {
 // Resource Class and Resource Kind is used as seperate parameter, other fileds
 // are saved in constant.
@@ -57,20 +72,25 @@ Constant *getAsConstant(const DxilResourceProperties &RP, Type *Ty,
 
 DxilResourceProperties loadFromConstant(const Constant &C,
                                         DXIL::ResourceClass RC,
-                                        DXIL::ResourceKind RK, Type *Ty,
-                                        const ShaderModel &) {
+                                        DXIL::ResourceKind RK) {
   DxilResourceProperties RP;
   RP.Class = RC;
   RP.Kind = RK;
   // Ty Should match C.getType().
+  Type *Ty = C.getType();
   StructType *ST = cast<StructType>(Ty);
   switch (ST->getNumElements()) {
   case 2: {
-    const ConstantStruct *CS = cast<ConstantStruct>(&C);
-    const Constant *RawDword0 = CS->getOperand(0);
-    const Constant *RawDword1 = CS->getOperand(1);
-    RP.RawDword0 = cast<ConstantInt>(RawDword0)->getLimitedValue();
-    RP.RawDword1 = cast<ConstantInt>(RawDword1)->getLimitedValue();
+    if (isa<ConstantAggregateZero>(&C)) {
+      RP.RawDword0 = 0;
+      RP.RawDword1 = 0;
+    } else {
+      const ConstantStruct *CS = cast<ConstantStruct>(&C);
+      const Constant *RawDword0 = CS->getOperand(0);
+      const Constant *RawDword1 = CS->getOperand(1);
+      RP.RawDword0 = cast<ConstantInt>(RawDword0)->getLimitedValue();
+      RP.RawDword1 = cast<ConstantInt>(RawDword1)->getLimitedValue();
+    }
   } break;
   default:
     RP.Class = DXIL::ResourceClass::Invalid;
@@ -87,7 +107,7 @@ loadFromAnnotateHandle(DxilInst_AnnotateHandle &annotateHandle, llvm::Type *Ty,
       cast<ConstantStruct>(annotateHandle.get_props());
   return loadFromConstant(
       *ResProp, (DXIL::ResourceClass)annotateHandle.get_resourceClass_val(),
-      (DXIL::ResourceKind)annotateHandle.get_resourceKind_val(), Ty, SM);
+      (DXIL::ResourceKind)annotateHandle.get_resourceKind_val());
 }
 
 DxilResourceProperties loadFromResourceBase(DxilResourceBase *Res) {

+ 14 - 14
lib/HLSL/DxilGenerationPass.cpp

@@ -333,8 +333,7 @@ private:
 
 namespace {
 void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) {
-  Value *opArg = hlslOP.GetU32Const(
-      (unsigned)DXIL::OpCode::CreateHandleForLib);
+  Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::CreateHandleForLib);
 
   for (auto U = F->user_begin(); U != F->user_end();) {
     Value *user = *(U++);
@@ -347,8 +346,8 @@ void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) {
     IRBuilder<> Builder(CI);
     // Res could be ld/phi/select. Will be removed in
     // DxilLowerCreateHandleForLib.
-    Function *createHandle = hlslOP.GetOpFunc(
-        DXIL::OpCode::CreateHandleForLib, res->getType());
+    Function *createHandle =
+        hlslOP.GetOpFunc(DXIL::OpCode::CreateHandleForLib, res->getType());
     newHandle = Builder.CreateCall(createHandle, {opArg, res});
 
     CI->replaceAllUsesWith(newHandle);
@@ -445,21 +444,22 @@ void DxilGenerationPass::LowerHLCreateHandle(
   for (iplist<Function>::iterator F : M->getFunctionList()) {
     if (F->user_empty())
       continue;
-    if (!F->isDeclaration()) {
-      hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
-      if (group == HLOpcodeGroup::HLCreateHandle) {
-        // Will lower in later pass.
-        TranslateHLCreateHandle(F, hlslOP);
-      }
-    } else {
-      hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
-      if (group != HLOpcodeGroup::HLAnnotateHandle)
-        continue;
+    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
+    switch (group) {
+    default:
+      break;
+    case HLOpcodeGroup::HLCreateHandle:
+
+      TranslateHLCreateHandle(F, hlslOP);
+      break;
+    case HLOpcodeGroup::HLAnnotateHandle:
       TranslateHLAnnotateHandle(F, hlslOP, HandleToResTypeMap);
+      break;
     }
   }
 }
 
+
 static void
 MarkUavUpdateCounter(Value* LoadOrGEP,
                      DxilResource &res,

+ 58 - 136
lib/HLSL/HLModule.cpp

@@ -416,32 +416,6 @@ DxilFunctionAnnotation *HLModule::AddFunctionAnnotation(llvm::Function *F) {
   return m_pTypeSystem->AddFunctionAnnotation(F);
 }
 
-void HLModule::AddResourceTypeAnnotation(llvm::Type *Ty,
-                                         DXIL::ResourceClass resClass,
-                                         DXIL::ResourceKind kind) {
-  if (m_ResTypeAnnotation.count(Ty) == 0) {
-    m_ResTypeAnnotation.emplace(Ty, std::make_pair(resClass, kind));
-  } else {
-    DXASSERT(resClass == m_ResTypeAnnotation[Ty].first, "resClass mismatch");
-    DXASSERT(kind == m_ResTypeAnnotation[Ty].second, "kind mismatch");
-  }
-}
-
-DXIL::ResourceClass HLModule::GetResourceClass(llvm::Type *Ty) {
-  if (m_ResTypeAnnotation.count(Ty) > 0) {
-    return m_ResTypeAnnotation[Ty].first;
-  } else {
-    return DXIL::ResourceClass::Invalid;
-  }
-}
-DXIL::ResourceKind HLModule::GetResourceKind(llvm::Type *Ty) {
-  if (m_ResTypeAnnotation.count(Ty) > 0) {
-    return m_ResTypeAnnotation[Ty].second;
-  } else {
-    return DXIL::ResourceKind::Invalid;
-  }
-}
-
 DXIL::Float32DenormMode HLModule::GetFloat32DenormMode() const {
   return m_Float32DenormMode;
 }
@@ -460,7 +434,6 @@ void HLModule::SetDefaultLinkage(const DXIL::DefaultLinkage linkage) {
 
 static const StringRef kHLDxilFunctionPropertiesMDName           = "dx.fnprops";
 static const StringRef kHLDxilOptionsMDName                      = "dx.options";
-static const StringRef kHLDxilResourceTypeAnnotationMDName       = "dx.resource.type.annotation";
 
 // DXIL metadata serialization/deserialization.
 void HLModule::EmitHLMetadata() {
@@ -491,9 +464,6 @@ void HLModule::EmitHLMetadata() {
     uint32_t hlOptions = m_Options.GetHLOptionsRaw();
     options->addOperand(MDNode::get(m_Ctx, m_pMDHelper->Uint32ToConstMD(hlOptions)));
     options->addOperand(MDNode::get(m_Ctx, m_pMDHelper->Uint32ToConstMD(GetAutoBindingSpace())));
-
-    NamedMDNode * resTyAnnotations = m_pModule->getOrInsertNamedMetadata(kHLDxilResourceTypeAnnotationMDName);
-    resTyAnnotations->addOperand(EmitResTyAnnotations());
   }
 
   if (!m_SerializedRootSignature.empty()) {
@@ -551,10 +521,6 @@ void HLModule::LoadHLMetadata() {
     m_Options.SetHLOptionsRaw(DxilMDHelper::ConstMDToUint32(MDOptions->getOperand(0)));
     if (options->getNumOperands() > 1)
       SetAutoBindingSpace(DxilMDHelper::ConstMDToUint32(options->getOperand(1)->getOperand(0)));
-    NamedMDNode * resTyAnnotations = m_pModule->getOrInsertNamedMetadata(kHLDxilResourceTypeAnnotationMDName);
-    const MDNode *MDResTyAnnotations = resTyAnnotations->getOperand(0);
-    if (MDResTyAnnotations->getNumOperands())
-      LoadResTyAnnotations(MDResTyAnnotations->getOperand(0));
   }
 
   m_pMDHelper->LoadRootSignature(m_SerializedRootSignature);
@@ -582,7 +548,6 @@ void HLModule::ClearHLMetadata(llvm::Module &M) {
         name == DxilMDHelper::kDxilTypeSystemMDName ||
         name == DxilMDHelper::kDxilValidatorVersionMDName ||
         name == kHLDxilFunctionPropertiesMDName || // TODO: adjust to proper name
-        name == kHLDxilResourceTypeAnnotationMDName ||
         name == kHLDxilOptionsMDName ||
         name.startswith(DxilMDHelper::kDxilTypeSystemHelperVariablePrefix)) {
       nodes.push_back(b);
@@ -682,40 +647,6 @@ void HLModule::LoadHLResources(const llvm::MDOperand &MDO) {
   }
 }
 
-llvm::MDTuple *HLModule::EmitResTyAnnotations() {
-  vector<Metadata *> MDVals;
-  for (auto &resAnnotation : m_ResTypeAnnotation) {
-    Metadata *TyMeta =
-        ValueAsMetadata::get(UndefValue::get(resAnnotation.first));
-    MDVals.emplace_back(TyMeta);
-    MDVals.emplace_back(m_pMDHelper->Uint32ToConstMD(
-        static_cast<unsigned>(resAnnotation.second.first)));
-    MDVals.emplace_back(m_pMDHelper->Uint32ToConstMD(
-        static_cast<unsigned>(resAnnotation.second.second)));
-  }
-  return MDNode::get(m_Ctx, MDVals);
-}
-void HLModule::LoadResTyAnnotations(const llvm::MDOperand &MDO) {
-  if (MDO.get() == nullptr)
-    return;
-
-  const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get());
-  IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL((pTupleMD->getNumOperands() & 0x3) == 0,
-          DXC_E_INCORRECT_DXIL_METADATA);
-  for (unsigned iNode = 0; iNode < pTupleMD->getNumOperands(); iNode += 3) {
-    const MDOperand &MDTy = pTupleMD->getOperand(iNode);
-    const MDOperand &MDClass = pTupleMD->getOperand(iNode + 1);
-    const MDOperand &MDKind = pTupleMD->getOperand(iNode + 2);
-    Type *Ty = m_pMDHelper->ValueMDToValue(MDTy)->getType();
-    DXIL::ResourceClass resClass = static_cast<DXIL::ResourceClass>(
-        DxilMDHelper::ConstMDToUint32(MDClass));
-    DXIL::ResourceKind kind =
-        static_cast<DXIL::ResourceKind>(DxilMDHelper::ConstMDToUint32(MDKind));
-    AddResourceTypeAnnotation(Ty, resClass, kind);
-  }
-}
-
 MDTuple *HLModule::EmitHLShaderProperties() {
   return nullptr;
 }
@@ -763,16 +694,11 @@ void HLModule::LoadDxilSamplerFromMDNode(llvm::MDNode *MD, DxilSampler &S) {
   return m_pMDHelper->LoadDxilSamplerFromMDNode(MD, S);
 }
 
-DxilResourceBase *HLModule::AddResourceWithGlobalVariableAndMDNode(llvm::Constant *GV,
-                                                      llvm::MDNode *MD) {
-  IFTBOOL(MD->getNumOperands() >= DxilMDHelper::kHLDxilResourceAttributeNumFields,
-          DXC_E_INCORRECT_DXIL_METADATA);
+DxilResourceBase *
+HLModule::AddResourceWithGlobalVariableAndProps(llvm::Constant *GV,
+                                                 DxilResourceProperties &RP) {
+  DxilResource::Class RC = RP.Class;
 
-  DxilResource::Class RC =
-      static_cast<DxilResource::Class>(m_pMDHelper->ConstMDToUint32(
-          MD->getOperand(DxilMDHelper::kHLDxilResourceAttributeClass)));
-  const MDOperand &Meta =
-      MD->getOperand(DxilMDHelper::kHLDxilResourceAttributeMeta);
   unsigned rangeSize = 1;
   Type *Ty = GV->getType()->getPointerElementType();
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
@@ -781,7 +707,11 @@ DxilResourceBase *HLModule::AddResourceWithGlobalVariableAndMDNode(llvm::Constan
   switch (RC) {
   case DxilResource::Class::Sampler: {
     std::unique_ptr<DxilSampler> S = llvm::make_unique<DxilSampler>();
-    m_pMDHelper->LoadDxilSampler(Meta, *S);
+    if (RP.Kind == DXIL::ResourceKind::SamplerComparison)
+      S->SetSamplerKind(DxilSampler::SamplerKind::Comparison);
+    else
+      S->SetSamplerKind(DxilSampler::SamplerKind::Default);
+    S->SetKind(RP.Kind);
     S->SetGlobalSymbol(GV);
     S->SetGlobalName(GV->getName());
     S->SetRangeSize(rangeSize);
@@ -790,7 +720,16 @@ DxilResourceBase *HLModule::AddResourceWithGlobalVariableAndMDNode(llvm::Constan
   } break;
   case DxilResource::Class::SRV: {
     std::unique_ptr<HLResource> Res = llvm::make_unique<HLResource>();
-    m_pMDHelper->LoadDxilSRV(Meta, *Res);
+    if (DXIL::IsTyped(RP.Kind)) {
+      Res->SetCompType(RP.Typed.CompType);
+      if (RP.Kind == DXIL::ResourceKind::Texture2DMS ||
+          RP.Kind == DXIL::ResourceKind::Texture2DMSArray)
+        Res->SetSampleCount(RP.getSampleCount());
+    } else if (DXIL::IsStructuredBuffer(RP.Kind)) {
+      Res->SetElementStride(RP.ElementStride);
+    }
+    Res->SetRW(false);
+    Res->SetKind(RP.Kind);
     Res->SetGlobalSymbol(GV);
     Res->SetGlobalName(GV->getName());
     Res->SetRangeSize(rangeSize);
@@ -799,7 +738,19 @@ DxilResourceBase *HLModule::AddResourceWithGlobalVariableAndMDNode(llvm::Constan
   } break;
   case DxilResource::Class::UAV: {
     std::unique_ptr<HLResource> Res = llvm::make_unique<HLResource>();
-    m_pMDHelper->LoadDxilUAV(Meta, *Res);
+    if (DXIL::IsTyped(RP.Kind)) {
+      Res->SetCompType(RP.Typed.CompType);
+      Res->SetSampleCount(RP.getSampleCount());
+    } else if (DXIL::IsStructuredBuffer(RP.Kind)) {
+      Res->SetElementStride(RP.ElementStride);
+    }
+
+    Res->SetRW(true);
+    Res->SetROV(RP.UAV.bROV);
+    Res->SetGloballyCoherent(RP.UAV.bGloballyCoherent);
+    if (RP.Kind == DXIL::ResourceKind::StructuredBufferWithCounter)
+      Res->SetHasCounter(true);
+    Res->SetKind(RP.Kind);
     Res->SetGlobalSymbol(GV);
     Res->SetGlobalName(GV->getName());
     Res->SetRangeSize(rangeSize);
@@ -824,17 +775,30 @@ void HLModule::AddRegBinding(unsigned CbID, unsigned ConstantIdx, unsigned Srv,
   m_SamplerBindingInCB[Key] = Sampler;
 }
 
-static DXIL::ResourceClass GetRCFromType(Type *ResTy, Module &M) {
-  MDNode *MD = HLModule::GetDxilResourceAttrib(ResTy, M);
-  if (!MD)
-    return DXIL::ResourceClass::Invalid;
-  DxilResource::Class RC =
-      static_cast<DxilResource::Class>(DxilMDHelper::ConstMDToUint32(
-          MD->getOperand(DxilMDHelper::kHLDxilResourceAttributeClass)));
-  return RC;
+// Helper functions for resource in cbuffer.
+namespace {
+
+DXIL::ResourceClass GetRCFromType(StructType *ST, Module &M) {
+  for (Function &F : M.functions()) {
+    if (F.user_empty())
+      continue;
+    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
+    if (group != HLOpcodeGroup::HLAnnotateHandle)
+      continue;
+    Type *Ty = F.getFunctionType()->getParamType(
+        HLOperandIndex::kAnnotateHandleResourceTypeOpIdx);
+    if (Ty != ST)
+      continue;
+    CallInst *CI = cast<CallInst>(F.user_back());
+    return (DXIL::ResourceClass)cast<ConstantInt>(
+               CI->getArgOperand(
+                   HLOperandIndex::kAnnotateHandleResourceClassOpIdx))
+        ->getLimitedValue();
+  }
+  return DXIL::ResourceClass::Invalid;
 }
 
-static unsigned CountResNum(Module &M, Type *Ty, DXIL::ResourceClass RC) {
+unsigned CountResNum(Module &M, Type *Ty, DXIL::ResourceClass RC) {
   // Count num of RCs.
   unsigned ArraySize = 1;
   while (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
@@ -871,7 +835,7 @@ static unsigned CountResNum(Module &M, Type *Ty, DXIL::ResourceClass RC) {
 // x[1].s s4
 // x[1].y t6
 // So x[0].x and x[1].x not in an array.
-static unsigned CalcRegBinding(gep_type_iterator GEPIt, gep_type_iterator E,
+unsigned CalcRegBinding(gep_type_iterator GEPIt, gep_type_iterator E,
                                Module &M, DXIL::ResourceClass RC) {
   unsigned NumRC = 0;
   // Count GEP offset when only count RC size.
@@ -892,9 +856,11 @@ static unsigned CalcRegBinding(gep_type_iterator GEPIt, gep_type_iterator E,
   }
   return NumRC;
 }
+} // namespace
 
 unsigned HLModule::GetBindingForResourceInCB(GetElementPtrInst *CbPtr,
-                                             GlobalVariable *CbGV) {
+                                             GlobalVariable *CbGV,
+                                             DXIL::ResourceClass RC) {
   if (!CbPtr->hasAllConstantIndices()) {
     // Not support dynmaic indexing resource array inside cb.
     string ErrorMsg("Index for resource array inside cbuffer must be a literal expression");
@@ -904,8 +870,6 @@ unsigned HLModule::GetBindingForResourceInCB(GetElementPtrInst *CbPtr,
     return UINT_MAX;
   }
   Module &M = *m_pModule;
-  Type *ResTy = CbPtr->getResultElementType();
-  DxilResource::Class RC = GetRCFromType(ResTy, M);
 
   unsigned RegBinding = UINT_MAX;
   for (auto &CB : m_CBuffers) {
@@ -1270,48 +1234,6 @@ bool HLModule::HasPreciseAttribute(Function *F) {
   return preciseNode != nullptr;
 }
 
-void HLModule::MarkDxilResourceAttrib(llvm::Function *F, MDNode *MD) {
-  F->setMetadata(DxilMDHelper::kHLDxilResourceAttributeMDName, MD);
-}
-
-MDNode *HLModule::GetDxilResourceAttrib(llvm::Function *F) {
-  return F->getMetadata(DxilMDHelper::kHLDxilResourceAttributeMDName);
-}
-
-void HLModule::MarkDxilResourceAttrib(llvm::Argument *Arg, llvm::MDNode *MD) {
-  unsigned i = Arg->getArgNo();
-  Function *F = Arg->getParent();
-  DxilFunctionAnnotation *FuncAnnot = m_pTypeSystem->GetFunctionAnnotation(F);
-  if (!FuncAnnot) {
-    DXASSERT(0, "Invalid function");
-    return;
-  }
-  DxilParameterAnnotation &ParamAnnot = FuncAnnot->GetParameterAnnotation(i);
-  ParamAnnot.SetResourceAttribute(MD);
-}
-
-MDNode *HLModule::GetDxilResourceAttrib(llvm::Argument *Arg) {
-  unsigned i = Arg->getArgNo();
-  Function *F = Arg->getParent();
-  DxilFunctionAnnotation *FuncAnnot = m_pTypeSystem->GetFunctionAnnotation(F);
-  if (!FuncAnnot)
-    return nullptr;
-  DxilParameterAnnotation &ParamAnnot = FuncAnnot->GetParameterAnnotation(i);
-  return ParamAnnot.GetResourceAttribute();
-}
-
-MDNode *HLModule::GetDxilResourceAttrib(Type *Ty, Module &M) {
-  for (Function &F : M.functions()) {
-    if (hlsl::GetHLOpcodeGroupByName(&F) == HLOpcodeGroup::HLCreateHandle) {
-      Type *ResTy = F.getFunctionType()->getParamType(
-          HLOperandIndex::kCreateHandleResourceOpIdx);
-      if (ResTy == Ty)
-        return GetDxilResourceAttrib(&F);
-    }
-  }
-  return nullptr;
-}
-
 DIGlobalVariable *
 HLModule::FindGlobalVariableDebugInfo(GlobalVariable *GV,
                                       DebugInfoFinder &DbgInfoFinder) {

+ 49 - 121
lib/HLSL/HLOperationLower.cpp

@@ -106,21 +106,37 @@ public:
     return Res.ResourceType;
   }
 
-  void MarkHasCounter(Type *Ty, Value *handle) {
+  void MarkHasCounter(Value *handle, Type *i8Ty) {
+    CallInst *CIHandle = cast<CallInst>(handle);
+    hlsl::HLOpcodeGroup group =
+        hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction());
+    DXASSERT(group == HLOpcodeGroup::HLAnnotateHandle, "else invalid handle");
+    // Mark has counter for the input handle.
+    Value *counterHandle =
+        CIHandle->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
+    // Change kind into StructurBufferWithCounter.
+
+    CIHandle->setArgOperand(
+        HLOperandIndex::kAnnotateHandleResourceKindOpIdx,
+        ConstantInt::get(
+            i8Ty,
+            (unsigned)DXIL::ResourceKind::StructuredBufferWithCounter));
+
     DXIL::ResourceClass RC = GetRC(handle);
     DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
                       "must UAV for counter");
     std::unordered_set<Value *> resSet;
-    MarkHasCounterOnCreateHandle(handle, resSet);
+    MarkHasCounterOnCreateHandle(counterHandle, resSet);
   }
 
   Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
-                                     GlobalVariable *CbGV, MDNode *MD) {
+                                     GlobalVariable *CbGV,
+                                     DxilResourceProperties &RP) {
     // Change array idx to 0 to make sure all array ptr share same key.
     Value *Key = UniformCbPtr(CbPtr, CbGV);
     if (CBPtrToResourceMap.count(Key))
       return CBPtrToResourceMap[Key];
-    Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, MD);
+    Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP);
     CBPtrToResourceMap[Key] = Resource;
     return Resource;
   }
@@ -151,6 +167,24 @@ public:
     return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
   }
 
+  DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) {
+    DXIL::ResourceClass RC =
+        (DXIL::ResourceClass)cast<ConstantInt>(
+            Anno->getArgOperand(
+                HLOperandIndex::kAnnotateHandleResourceClassOpIdx))
+            ->getLimitedValue();
+    DXIL::ResourceKind RK =
+        (DXIL::ResourceKind)cast<ConstantInt>(
+            Anno->getArgOperand(
+                HLOperandIndex::kAnnotateHandleResourceKindOpIdx))
+            ->getLimitedValue();
+    Constant *Props = cast<Constant>(Anno->getArgOperand(
+        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
+    DxilResourceProperties RP = resource_helper::loadFromConstant(
+        *Props, RC, RK);
+    return RP;
+  }
+
 private:
   ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
     if (HandleMetaMap.count(Handle))
@@ -180,98 +214,6 @@ private:
         return HandleMetaMap[Handle];
       }
     }
-    if (Argument *Arg = dyn_cast<Argument>(Handle)) {
-      MDNode *MD = HLM.GetDxilResourceAttrib(Arg);
-      if (!MD) {
-        Handle->getContext().emitError("cannot map resource to handle");
-        return HandleMetaMap[Handle];
-      }
-      DxilResourceBase Res(DxilResource::Class::Invalid);
-      HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
-
-      ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
-                             Res.GetGlobalSymbol()->getType()};
-
-      HandleMetaMap[Handle] = Attrib;
-      return HandleMetaMap[Handle];
-    }
-    if (LoadInst *LI = dyn_cast<LoadInst>(Handle)) {
-      Value *Ptr = LI->getPointerOperand();
-
-      for (User *U : Ptr->users()) {
-        if (CallInst *CI = dyn_cast<CallInst>(U)) {
-          DxilFunctionAnnotation *FnAnnot = HLM.GetFunctionAnnotation(CI->getCalledFunction());
-          if (FnAnnot) {
-            for (auto &arg : CI->arg_operands()) {
-              if (arg == Ptr) {
-                unsigned argNo = arg.getOperandNo();
-                DxilParameterAnnotation &ParamAnnot = FnAnnot->GetParameterAnnotation(argNo);
-                MDNode *MD = ParamAnnot.GetResourceAttribute();
-                if (!MD) {
-                  Handle->getContext().emitError(
-                      "cannot map resource to handle");
-                  return HandleMetaMap[Handle];
-                }
-                DxilResourceBase Res(DxilResource::Class::Invalid);
-                HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
-
-                ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
-                                       Res.GetGlobalSymbol()->getType()};
-
-                HandleMetaMap[Handle] = Attrib;
-                return HandleMetaMap[Handle];
-              }
-            }
-          }
-        }
-        if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-          Value *V = SI->getValueOperand();
-          ResAttribute Attrib = FindCreateHandleResourceBase(V);
-          HandleMetaMap[Handle] = Attrib;
-          return HandleMetaMap[Handle];
-        }
-      }
-      // Cannot find.
-      Handle->getContext().emitError("cannot map resource to handle");
-      return HandleMetaMap[Handle];
-    }
-    if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
-      MDNode *MD = HLM.GetDxilResourceAttrib(CI->getCalledFunction());
-      if (!MD) {
-        Handle->getContext().emitError("cannot map resource to handle");
-        return HandleMetaMap[Handle];
-      }
-      DxilResourceBase Res(DxilResource::Class::Invalid);
-      HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
-
-      ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
-                             Res.GetGlobalSymbol()->getType()};
-
-      HandleMetaMap[Handle] = Attrib;
-      return HandleMetaMap[Handle];
-    }
-    if (SelectInst *Sel = dyn_cast<SelectInst>(Handle)) {
-      ResAttribute &ResT = FindCreateHandleResourceBase(Sel->getTrueValue());
-      // Use MDT here, ResourceClass, ResourceID match is done at
-      // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
-      HandleMetaMap[Handle] = ResT;
-      FindCreateHandleResourceBase(Sel->getFalseValue());
-      return ResT;
-    }
-    if (PHINode *Phi = dyn_cast<PHINode>(Handle)) {
-      if (Phi->getNumOperands() == 0) {
-        Handle->getContext().emitError("cannot map resource to handle");
-        return HandleMetaMap[Handle];
-      }
-      ResAttribute &Res0 = FindCreateHandleResourceBase(Phi->getOperand(0));
-      // Use Res0 here, ResourceClass, ResourceID match is done at
-      // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
-      HandleMetaMap[Handle] = Res0;
-      for (unsigned i = 1; i < Phi->getNumOperands(); i++) {
-        FindCreateHandleResourceBase(Phi->getOperand(i));
-      }
-      return Res0;
-    }
     Handle->getContext().emitError("cannot map resource to handle");
 
     return HandleMetaMap[Handle];
@@ -350,7 +292,7 @@ private:
   }
 
   Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
-                                MDNode *MD) {
+                                DxilResourceProperties &RP) {
     Type *CbTy = CbPtr->getPointerOperandType();
     DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
 
@@ -385,15 +327,16 @@ private:
 
     Type *Ty = CbPtr->getResultElementType();
     // Not support resource array in cbuffer.
-    unsigned ResBinding = HLM.GetBindingForResourceInCB(CbPtr, CbGV);
-    return CreateResourceGV(Ty, Name, MD, ResBinding);
+    unsigned ResBinding = HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.Class);
+    return CreateResourceGV(Ty, Name, RP, ResBinding);
   }
 
-  Value *CreateResourceGV(Type *Ty, StringRef Name, MDNode *MD, unsigned ResBinding) {
+  Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP,
+                          unsigned ResBinding) {
     Module &M = *HLM.GetModule();
     Constant *GV = M.getOrInsertGlobal(Name, Ty);
     // Create resource and set GV as globalSym.
-    DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndMDNode(GV, MD);
+    DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP);
     DXASSERT(Res, "fail to create resource for global variable in cbuffer");
     Res->SetLowerBound(ResBinding);
     return GV;
@@ -2556,23 +2499,8 @@ Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                              HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
   Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
-  Value *counterHandle = handle;
-  if (CallInst *CIHandle = dyn_cast<CallInst>(handle)) {
-    hlsl::HLOpcodeGroup group =
-        hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction());
-    if (group == HLOpcodeGroup::HLAnnotateHandle) {
-      // Mark has counter for the input handle.
-      counterHandle =
-          CIHandle->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
-      // Change kind into StructurBufferWithCounter.
-      CIHandle->setArgOperand(
-          HLOperandIndex::kAnnotateHandleResourceKindOpIdx,
-          ConstantInt::get(
-              helper.i8Ty,
-              (unsigned)DXIL::ResourceKind::StructuredBufferWithCounter));
-    }
-  }
-  pObjHelper->MarkHasCounter(counterHandle->getType(), counterHandle);
+
+  pObjHelper->MarkHasCounter(handle, helper.i8Ty);
 
   bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
   IRBuilder<> Builder(CI);
@@ -5632,9 +5560,9 @@ void TranslateResourceInCB(LoadInst *LI,
 
   GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
   CallInst *CI = cast<CallInst>(LI->user_back());
-  MDNode *MD = HLModule::GetDxilResourceAttrib(CI->getCalledFunction());
-
-  Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, MD);
+  CallInst *Anno = cast<CallInst>(CI->user_back());
+  DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno);
+  Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP);
 
   // Lower Ptr to GV base Ptr.
   Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);

+ 0 - 2
lib/HLSL/HLOperations.cpp

@@ -436,8 +436,6 @@ static void SetHLFunctionAttribute(Function *F, HLOpcodeGroup group,
   case HLOpcodeGroup::HLCreateHandle: {
     F->addFnAttr(Attribute::ReadNone);
     F->addFnAttr(Attribute::NoUnwind);
-    F->addFnAttr(Attribute::NoInline);
-    F->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
   } break;
   case HLOpcodeGroup::HLAnnotateHandle: {
     F->addFnAttr(Attribute::ReadNone);

+ 0 - 16
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -4879,8 +4879,6 @@ void SROA_Parameter_HLSL::replaceCastParameter(
     Value *NewParam, Value *OldParam, Function &F, Argument *Arg,
     const DxilParamInputQual inputQual, IRBuilder<> &Builder) {
   Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
-  Type *HandlePtrTy = PointerType::get(HandleTy, 0);
-  Module &M = *m_pHLModule->GetModule();
 
   Type *NewTy = NewParam->getType();
   Type *OldTy = OldParam->getType();
@@ -4914,10 +4912,6 @@ void SROA_Parameter_HLSL::replaceCastParameter(
 
   if (NewTy == HandleTy) {
     CopyHandleToResourcePtr(NewParam, OldParam, *m_pHLModule, Builder);
-    // Save resource attribute.
-    Type *ResTy = OldTy->getPointerElementType();
-    MDNode *MD = HLModule::GetDxilResourceAttrib(ResTy, M);
-    m_pHLModule->MarkDxilResourceAttrib(Arg, MD);
   } else if (vectorEltsMap.count(NewParam)) {
     // Vector is flattened to scalars.
     Type *VecTy = OldTy;
@@ -4984,16 +4978,6 @@ void SROA_Parameter_HLSL::replaceCastParameter(
         }
       }
     }
-
-    Type *NewEltTy = dxilutil::GetArrayEltTy(NewTy);
-    Type *OldEltTy = dxilutil::GetArrayEltTy(OldTy);
-
-    if (NewEltTy == HandlePtrTy) {
-      // Save resource attribute.
-      Type *ResTy = OldEltTy;
-      MDNode *MD = HLModule::GetDxilResourceAttrib(ResTy, M);
-      m_pHLModule->MarkDxilResourceAttrib(Arg, MD);
-    }
   }
 }
 

+ 90 - 2531
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -49,43 +49,19 @@
 #include "dxc/HLSL/DxilExportMap.h"
 #include "dxc/DXIL/DxilResourceProperties.h"
 
+#include "CGHLSLMSHelper.h"
+
 using namespace clang;
 using namespace CodeGen;
 using namespace hlsl;
 using namespace llvm;
 using std::unique_ptr;
+using namespace CGHLSLMSHelper;
 
 static const bool KeepUndefinedTrue = true; // Keep interpolation mode undefined if not set explicitly.
 
 namespace {
 
-/// Use this class to represent HLSL cbuffer in high-level DXIL.
-class HLCBuffer : public DxilCBuffer {
-public:
-  HLCBuffer() = default;
-  virtual ~HLCBuffer() = default;
-
-  void AddConst(std::unique_ptr<DxilResourceBase> &pItem);
-
-  std::vector<std::unique_ptr<DxilResourceBase>> &GetConstants();
-
-private:
-  std::vector<std::unique_ptr<DxilResourceBase>> constants; // constants inside const buffer
-};
-
-//------------------------------------------------------------------------------
-//
-// HLCBuffer methods.
-//
-void HLCBuffer::AddConst(std::unique_ptr<DxilResourceBase> &pItem) {
-  pItem->SetID(constants.size());
-  constants.push_back(std::move(pItem));
-}
-
-std::vector<std::unique_ptr<DxilResourceBase>> &HLCBuffer::GetConstants() {
-  return constants;
-}
-
 class CGMSHLSLRuntime : public CGHLSLRuntime {
 
 private:
@@ -136,25 +112,11 @@ private:
   bool GetAsConstantUInt32(clang::Expr *expr, uint32_t *value);
   std::vector<StringRef> ParseSubobjectExportsAssociations(StringRef exports);
 
-  // Save the entryFunc so don't need to find it with original name.
-  struct EntryFunctionInfo {
-    clang::SourceLocation SL = clang::SourceLocation();
-    llvm::Function *Func = nullptr;
-  };
-
   EntryFunctionInfo Entry;
 
-  // Map to save patch constant functions
-  struct PatchConstantInfo {
-    clang::SourceLocation SL = clang::SourceLocation();
-    llvm::Function *Func = nullptr;
-    std::uint32_t NumOverloads = 0;
-  };
-
   StringMap<PatchConstantInfo> patchConstantFunctionMap;
   std::unordered_map<Function *, std::unique_ptr<DxilFunctionProps>>
       patchConstantFunctionPropsMap;
-  bool IsPatchConstantFunction(const Function *F);
 
   std::unordered_map<Function *, const clang::HLSLPatchConstantFuncAttr *>
       HSEntryPatchConstantFuncAttr;
@@ -223,8 +185,6 @@ private:
 
   void RemapObsoleteSemantic(DxilParameterAnnotation &paramInfo,
                              bool isPatchConstantFunction);
-
-  void SetEntryFunction();
   SourceLocation SetSemantic(const NamedDecl *decl,
                              DxilParameterAnnotation &paramInfo);
 
@@ -234,12 +194,6 @@ private:
   // save intrinsic opcode
   std::vector<std::pair<Function *, unsigned>> m_IntrinsicMap;
   void AddHLSLIntrinsicOpcodeToFunction(Function *, unsigned opcode);
-  void AddOpcodeParamForIntrinsics(
-      HLModule &HLM, std::vector<std::pair<Function *, unsigned>> &intrinsicMap,
-      std::unordered_map<llvm::Type *, MDNode *> &resMetaMap);
-  void AddOpcodeParamForIntrinsic(
-      HLModule &HLM, Function *F, unsigned opcode, llvm::Type *HandleTy,
-      std::unordered_map<llvm::Type *, MDNode *> &resMetaMap);
 
   // Type annotation related.
   unsigned ConstructStructAnnotation(DxilStructAnnotation *annotation,
@@ -260,10 +214,6 @@ public:
 
   /// Add resouce to the program
   void addResource(Decl *D) override;
-  void SetPatchConstantFunction(const EntryFunctionInfo &EntryFunc);
-  void SetPatchConstantFunctionWithAttr(
-      const EntryFunctionInfo &EntryFunc,
-      const clang::HLSLPatchConstantFuncAttr *PatchConstantFuncAttr);
 
   void addSubobject(Decl *D) override;
 
@@ -336,31 +286,6 @@ public:
 };
 }
 
-void clang::CompileRootSignature(
-    StringRef rootSigStr, DiagnosticsEngine &Diags, SourceLocation SLoc,
-    hlsl::DxilRootSignatureVersion rootSigVer,
-    hlsl::DxilRootSignatureCompilationFlags flags,
-    hlsl::RootSignatureHandle *pRootSigHandle) {
-  std::string OSStr;
-  llvm::raw_string_ostream OS(OSStr);
-  hlsl::DxilVersionedRootSignatureDesc *D = nullptr;
-
-  if (ParseHLSLRootSignature(rootSigStr.data(), rootSigStr.size(), rootSigVer,
-                             flags, &D, SLoc, Diags)) {
-    CComPtr<IDxcBlob> pSignature;
-    CComPtr<IDxcBlobEncoding> pErrors;
-    hlsl::SerializeRootSignature(D, &pSignature, &pErrors, false);
-    if (pSignature == nullptr) {
-      assert(pErrors != nullptr && "else serialize failed with no msg");
-      ReportHLSLRootSigError(Diags, SLoc, (char *)pErrors->GetBufferPointer(),
-                             pErrors->GetBufferSize());
-      hlsl::DeleteRootSignature(D);
-    } else {
-      pRootSigHandle->Assign(D, pSignature);
-    }
-  }
-}
-
 //------------------------------------------------------------------------------
 //
 // CGMSHLSLRuntime methods.
@@ -588,78 +513,6 @@ StringToMeshOutputTopology(StringRef topology) {
   return DXIL::MeshOutputTopology::Undefined;
 }
 
-static unsigned RoundToAlign(unsigned num, unsigned mod) {
-  // round num to next highest mod
-  if (mod != 0)
-    return mod * ((num + mod - 1) / mod);
-  return num;
-}
-
-// Align cbuffer offset in legacy mode (16 bytes per row).
-static unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
-                                          unsigned scalarSizeInBytes,
-                                          bool bNeedNewRow) {
-  if (unsigned remainder = (offset & 0xf)) {
-    // Start from new row
-    if (remainder + size > 16 || bNeedNewRow) {
-      return offset + 16 - remainder;
-    }
-    // If not, naturally align data
-    return RoundToAlign(offset, scalarSizeInBytes);
-  }
-  return offset;
-}
-
-static unsigned AlignBaseOffset(unsigned baseOffset, unsigned size,
-                                 QualType Ty, bool bDefaultRowMajor) {
-  bool needNewAlign = Ty->isArrayType();
-
-  if (IsHLSLMatType(Ty)) {
-    bool bRowMajor = false;
-    if (!hlsl::HasHLSLMatOrientation(Ty, &bRowMajor))
-      bRowMajor = bDefaultRowMajor;
-
-    unsigned row, col;
-    hlsl::GetHLSLMatRowColCount(Ty, row, col);
-
-    needNewAlign |= !bRowMajor && col > 1;
-    needNewAlign |= bRowMajor && row > 1;
-  } else if (Ty->isStructureOrClassType() && ! hlsl::IsHLSLVecType(Ty)) {
-    needNewAlign = true;
-  }
-
-  unsigned scalarSizeInBytes = 4;
-  const clang::BuiltinType *BT = Ty->getAs<clang::BuiltinType>();
-  if (hlsl::IsHLSLVecMatType(Ty)) {
-    BT = hlsl::GetElementTypeOrType(Ty)->getAs<clang::BuiltinType>();
-  }
-  if (BT) {
-    if (BT->getKind() == clang::BuiltinType::Kind::Double ||
-      BT->getKind() == clang::BuiltinType::Kind::LongLong)
-      scalarSizeInBytes = 8;
-    else if (BT->getKind() == clang::BuiltinType::Kind::Half ||
-      BT->getKind() == clang::BuiltinType::Kind::Short ||
-      BT->getKind() == clang::BuiltinType::Kind::UShort)
-      scalarSizeInBytes = 2;
-  }
-
-  return AlignBufferOffsetInLegacy(baseOffset, size, scalarSizeInBytes, needNewAlign);
-}
-
-static unsigned AlignBaseOffset(QualType Ty, unsigned baseOffset,
-                                bool bDefaultRowMajor,
-                                CodeGen::CodeGenModule &CGM,
-                                llvm::DataLayout &layout) {
-  QualType paramTy = Ty.getCanonicalType();
-  if (const ReferenceType *RefType = dyn_cast<ReferenceType>(paramTy))
-    paramTy = RefType->getPointeeType();
-
-  // Get size.
-  llvm::Type *Type = CGM.getTypes().ConvertType(paramTy);
-  unsigned size = layout.getTypeAllocSize(Type);
-  return AlignBaseOffset(baseOffset, size, paramTy, bDefaultRowMajor);
-}
-
 static unsigned GetMatrixSizeInCB(QualType Ty, bool defaultRowMajor,
                                   bool b64Bit) {
   bool bRowMajor;
@@ -905,6 +758,7 @@ void CGMSHLSLRuntime::ConstructFieldAttributedAnnotation(
 
   if (IsHLSLResourceType(Ty)) {
     // Always create for llvm::Type could be same for different QualType.
+    // TODO: change to DxilProperties.
     MDNode *MD = GetOrAddResTypeMD(Ty, /*bCreate*/ true);
     fieldAnnotation.SetResourceAttribute(MD);
   }
@@ -942,6 +796,57 @@ static void ConstructFieldInterpolation(DxilFieldAnnotation &fieldAnnotation,
     fieldAnnotation.SetInterpolationMode(InterpMode);
 }
 
+static unsigned AlignBaseOffset(unsigned baseOffset, unsigned size, QualType Ty,
+                                bool bDefaultRowMajor) {
+  bool needNewAlign = Ty->isArrayType();
+
+  if (IsHLSLMatType(Ty)) {
+    bool bRowMajor = false;
+    if (!hlsl::HasHLSLMatOrientation(Ty, &bRowMajor))
+      bRowMajor = bDefaultRowMajor;
+
+    unsigned row, col;
+    hlsl::GetHLSLMatRowColCount(Ty, row, col);
+
+    needNewAlign |= !bRowMajor && col > 1;
+    needNewAlign |= bRowMajor && row > 1;
+  } else if (Ty->isStructureOrClassType() && !hlsl::IsHLSLVecType(Ty)) {
+    needNewAlign = true;
+  }
+
+  unsigned scalarSizeInBytes = 4;
+  const clang::BuiltinType *BT = Ty->getAs<clang::BuiltinType>();
+  if (hlsl::IsHLSLVecMatType(Ty)) {
+    BT = hlsl::GetElementTypeOrType(Ty)->getAs<clang::BuiltinType>();
+  }
+  if (BT) {
+    if (BT->getKind() == clang::BuiltinType::Kind::Double ||
+        BT->getKind() == clang::BuiltinType::Kind::LongLong)
+      scalarSizeInBytes = 8;
+    else if (BT->getKind() == clang::BuiltinType::Kind::Half ||
+             BT->getKind() == clang::BuiltinType::Kind::Short ||
+             BT->getKind() == clang::BuiltinType::Kind::UShort)
+      scalarSizeInBytes = 2;
+  }
+
+  return AlignBufferOffsetInLegacy(baseOffset, size, scalarSizeInBytes,
+                                   needNewAlign);
+}
+
+static unsigned AlignBaseOffset(QualType Ty, unsigned baseOffset,
+                                bool bDefaultRowMajor,
+                                CodeGen::CodeGenModule &CGM,
+                                llvm::DataLayout &layout) {
+  QualType paramTy = Ty.getCanonicalType();
+  if (const ReferenceType *RefType = dyn_cast<ReferenceType>(paramTy))
+    paramTy = RefType->getPointeeType();
+
+  // Get size.
+  llvm::Type *Type = CGM.getTypes().ConvertType(paramTy);
+  unsigned size = layout.getTypeAllocSize(Type);
+  return AlignBaseOffset(baseOffset, size, paramTy, bDefaultRowMajor);
+}
+
 unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annotation,
                                       const RecordDecl *RD,
                                       DxilTypeSystem &dxilTypeSys) {
@@ -1230,48 +1135,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   // Add hlsl intrinsic attr
   unsigned intrinsicOpcode;
   StringRef intrinsicGroup;
-  llvm::FunctionType *FT = F->getFunctionType();
-
-  auto AddResourceMetadata = [&](QualType qTy, llvm::Type *Ty) {
-    hlsl::DxilResourceBase::Class resClass = TypeToClass(qTy);
-    if (resClass != hlsl::DxilResourceBase::Class::Invalid) {
-      if (!resMetadataMap.count(Ty)) {
-        MDNode *Meta = GetOrAddResTypeMD(qTy, /**/false);
-        DXASSERT(Meta, "else invalid resource type");
-        resMetadataMap[Ty] = Meta;
-      }
-    }
-  };
+
 
   if (hlsl::GetIntrinsicOp(FD, intrinsicOpcode, intrinsicGroup)) {
     AddHLSLIntrinsicOpcodeToFunction(F, intrinsicOpcode);
     F->addFnAttr(hlsl::HLPrefix, intrinsicGroup);
-    unsigned iParamOffset = 0; // skip this on llvm function
-
-    // Save resource type annotation.
-    if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
-      iParamOffset = 1;
-      const CXXRecordDecl *RD = MD->getParent();
-      // For nested case like sample_slice_type.
-      if (const CXXRecordDecl *PRD =
-              dyn_cast<CXXRecordDecl>(RD->getDeclContext())) {
-        RD = PRD;
-      }
-
-      QualType recordTy = MD->getASTContext().getRecordType(RD);
-      llvm::Type *Ty = CGM.getTypes().ConvertType(recordTy);
-      AddResourceMetadata(recordTy, Ty);
-    }
-
-    // Add metadata for any resources found in parameters
-    for (unsigned iParam = 0; iParam < FD->getNumParams(); iParam++) {
-      llvm::Type *Ty = FT->getParamType(iParam + iParamOffset);
-      if (!Ty->isPointerTy())
-        continue; // not a resource
-      Ty = Ty->getPointerElementType();
-      QualType paramTy = FD->getParamDecl(iParam)->getType();
-      AddResourceMetadata(paramTy, Ty);
-    }
 
     StringRef lower;
     if (hlsl::GetIntrinsicLowering(FD, lower))
@@ -3387,2364 +3255,55 @@ HLCBuffer &CGMSHLSLRuntime::GetOrCreateCBuffer(HLSLBufferDecl *D) {
   return *static_cast<HLCBuffer*>(&(m_pHLModule->GetCBuffer(cbID)));
 }
 
-bool CGMSHLSLRuntime::IsPatchConstantFunction(const Function *F) {
-  DXASSERT_NOMSG(F != nullptr);
-  for (auto && p : patchConstantFunctionMap) {
-    if (p.second.Func == F) return true;
-  }
-  return false;
-}
-
-void CGMSHLSLRuntime::SetEntryFunction() {
-  if (Entry.Func == nullptr) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
-    unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-                                            "cannot find entry function %0");
-    Diags.Report(DiagID) << CGM.getCodeGenOpts().HLSLEntryFunction;
-    return;
-  }
-
-  m_pHLModule->SetEntryFunction(Entry.Func);
-}
-
-// Here the size is CB size.
-// Offset still needs to be aligned based on type since this
-// is the legacy cbuffer global path.
-static unsigned AlignCBufferOffset(unsigned offset, unsigned size, llvm::Type *Ty, bool bRowMajor) {
-  DXASSERT(!(offset & 1), "otherwise we have an invalid offset.");
-  bool bNeedNewRow = Ty->isArrayTy();
-  if (!bNeedNewRow && Ty->isStructTy()) {
-    if (HLMatrixType mat = HLMatrixType::dyn_cast(Ty)) {
-      bNeedNewRow |= !bRowMajor && mat.getNumColumns() > 1;
-      bNeedNewRow |= bRowMajor && mat.getNumRows() > 1;
-    } else {
-      bNeedNewRow = true;
-    }
-  }
-  unsigned scalarSizeInBytes = Ty->getScalarSizeInBits() / 8;
-
-  return AlignBufferOffsetInLegacy(offset, size, scalarSizeInBytes, bNeedNewRow);
-}
-
-static unsigned AllocateDxilConstantBuffer(HLCBuffer &CB,
-  std::unordered_map<Constant*, DxilFieldAnnotation> &constVarAnnotationMap) {
-  unsigned offset = 0;
-
-  // Scan user allocated constants first.
-  // Update offset.
-  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
-    if (C->GetLowerBound() == UINT_MAX)
-      continue;
-    unsigned size = C->GetRangeSize();
-    unsigned nextOffset = size + C->GetLowerBound();
-    if (offset < nextOffset)
-      offset = nextOffset;
-  }
-
-  // Alloc after user allocated constants.
-  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
-    if (C->GetLowerBound() != UINT_MAX)
-      continue;
-
-    unsigned size = C->GetRangeSize();
-    llvm::Type *Ty = C->GetGlobalSymbol()->getType()->getPointerElementType();
-    auto fieldAnnotation = constVarAnnotationMap.at(C->GetGlobalSymbol());
-    bool bRowMajor = HLMatrixType::isa(Ty)
-      ? fieldAnnotation.GetMatrixAnnotation().Orientation == MatrixOrientation::RowMajor
-      : false;
-    // Align offset.
-    offset = AlignCBufferOffset(offset, size, Ty, bRowMajor);
-    if (C->GetLowerBound() == UINT_MAX) {
-      C->SetLowerBound(offset);
-    }
-    offset += size;
-  }
-  return offset;
-}
-
-static void AddRegBindingsForResourceInConstantBuffer(
-    HLModule *pHLModule,
-    llvm::DenseMap<llvm::Constant *,
-                   llvm::SmallVector<std::pair<DXIL::ResourceClass, unsigned>,
-                                     1>> &constantRegBindingMap) {
-  for (unsigned i = 0; i < pHLModule->GetCBuffers().size(); i++) {
-    HLCBuffer &CB = *static_cast<HLCBuffer *>(&(pHLModule->GetCBuffer(i)));
-    auto &Constants = CB.GetConstants();
-    for (unsigned j = 0; j < Constants.size(); j++) {
-      const std::unique_ptr<DxilResourceBase> &C = Constants[j];
-      Constant *CGV = C->GetGlobalSymbol();
-      auto &regBindings = constantRegBindingMap[CGV];
-      if (regBindings.empty())
-        continue;
-      unsigned Srv = UINT_MAX;
-      unsigned Uav = UINT_MAX;
-      unsigned Sampler = UINT_MAX;
-      for (auto it : regBindings) {
-        unsigned RegNum = it.second;
-        switch (it.first) {
-        case DXIL::ResourceClass::SRV:
-          Srv = RegNum;
-          break;
-        case DXIL::ResourceClass::UAV:
-          Uav = RegNum;
-          break;
-        case DXIL::ResourceClass::Sampler:
-          Sampler = RegNum;
-          break;
-        default:
-          DXASSERT(0, "invalid resource class");
-          break;
-        }
-      }
-      pHLModule->AddRegBinding(CB.GetID(), j, Srv, Uav, Sampler);
-    }
-  }
-}
-
-static void AllocateDxilConstantBuffers(HLModule *pHLModule,
-  std::unordered_map<Constant*, DxilFieldAnnotation> &constVarAnnotationMap) {
-  for (unsigned i = 0; i < pHLModule->GetCBuffers().size(); i++) {
-    HLCBuffer &CB = *static_cast<HLCBuffer*>(&(pHLModule->GetCBuffer(i)));
-    unsigned size = AllocateDxilConstantBuffer(CB, constVarAnnotationMap);
-    CB.SetSize(size);
-  }
-}
-
-static void ReplaceUseInFunction(Value *V, Value *NewV, Function *F,
-                                 IRBuilder<> &Builder) {
-  for (auto U = V->user_begin(); U != V->user_end(); ) {
-    User *user = *(U++);
-    if (Instruction *I = dyn_cast<Instruction>(user)) {
-      if (I->getParent()->getParent() == F) {
-        // replace use with GEP if in F
-        for (unsigned i = 0; i < I->getNumOperands(); i++) {
-          if (I->getOperand(i) == V)
-            I->setOperand(i, NewV);
-        }
-      }
-    } else {
-      // For constant operator, create local clone which use GEP.
-      // Only support GEP and bitcast.
-      if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(user)) {
-        std::vector<Value *> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
-        Value *NewGEP = Builder.CreateInBoundsGEP(NewV, idxList);
-        ReplaceUseInFunction(GEPOp, NewGEP, F, Builder);
-      } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(user)) {
-        // Change the init val into NewV with Store.
-        GV->setInitializer(nullptr);
-        Builder.CreateStore(NewV, GV);
-      } else {
-        // Must be bitcast here.
-        BitCastOperator *BC = cast<BitCastOperator>(user);
-        Value *NewBC = Builder.CreateBitCast(NewV, BC->getType());
-        ReplaceUseInFunction(BC, NewBC, F, Builder);
-      }
-    }
-  }
-}
-
-void MarkUsedFunctionForConst(Value *V, std::unordered_set<Function*> &usedFunc) {
-  for (auto U = V->user_begin(); U != V->user_end();) {
-    User *user = *(U++);
-    if (Instruction *I = dyn_cast<Instruction>(user)) {
-      Function *F = I->getParent()->getParent();
-      usedFunc.insert(F);
-    } else {
-      // For constant operator, create local clone which use GEP.
-      // Only support GEP and bitcast.
-      if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(user)) {
-        MarkUsedFunctionForConst(GEPOp, usedFunc);
-      } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(user)) {
-        MarkUsedFunctionForConst(GV, usedFunc);
-      } else {
-        // Must be bitcast here.
-        BitCastOperator *BC = cast<BitCastOperator>(user);
-        MarkUsedFunctionForConst(BC, usedFunc);
-      }
-    }
-  }
-}
-
-static Function * GetOrCreateHLCreateHandle(HLModule &HLM, llvm::Type *HandleTy,
-    ArrayRef<Value*> paramList, MDNode *MD) {
-  SmallVector<llvm::Type *, 4> paramTyList;
-  for (Value *param : paramList) {
-    paramTyList.emplace_back(param->getType());
-  }
-
-  llvm::FunctionType *funcTy =
-      llvm::FunctionType::get(HandleTy, paramTyList, false);
-  llvm::Module &M = *HLM.GetModule();
-  Function *CreateHandle = GetOrCreateHLFunctionWithBody(M, funcTy, HLOpcodeGroup::HLCreateHandle,
-      /*opcode*/ 0, "");
-  if (CreateHandle->empty()) {
-    // Add body.
-    BasicBlock *BB =
-        BasicBlock::Create(CreateHandle->getContext(), "Entry", CreateHandle);
-    IRBuilder<> Builder(BB);
-    // Just return undef to make a body.
-    Builder.CreateRet(UndefValue::get(HandleTy));
-    // Mark resource attribute.
-    HLM.MarkDxilResourceAttrib(CreateHandle, MD);
-  }
-  return CreateHandle;
-}
-
-static bool CreateCBufferVariable(HLCBuffer &CB,
-    HLModule &HLM, llvm::Type *HandleTy) {
-  bool bUsed = false;
-  // Build Struct for CBuffer.
-  SmallVector<llvm::Type*, 4> Elements;
-  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
-    Value *GV = C->GetGlobalSymbol();
-    if (GV->hasNUsesOrMore(1))
-      bUsed = true;
-    // Global variable must be pointer type.
-    llvm::Type *Ty = GV->getType()->getPointerElementType();
-    Elements.emplace_back(Ty);
-  }
-  // Don't create CBuffer variable for unused cbuffer.
-  if (!bUsed)
-    return false;
-
-  llvm::Module &M = *HLM.GetModule();
-
-  bool isCBArray = CB.GetRangeSize() != 1;
-  llvm::GlobalVariable *cbGV = nullptr;
-  llvm::Type *cbTy = nullptr;
-
-  unsigned cbIndexDepth = 0;
-  if (!isCBArray) {
-    llvm::StructType *CBStructTy =
-        llvm::StructType::create(Elements, CB.GetGlobalName());
-    cbGV = new llvm::GlobalVariable(M, CBStructTy, /*IsConstant*/ true,
-                                    llvm::GlobalValue::ExternalLinkage,
-                                    /*InitVal*/ nullptr, CB.GetGlobalName());
-    cbTy = cbGV->getType();
-  } else {
-    // For array of ConstantBuffer, create array of struct instead of struct of
-    // array.
-    DXASSERT(CB.GetConstants().size() == 1,
-             "ConstantBuffer should have 1 constant");
-    Value *GV = CB.GetConstants()[0]->GetGlobalSymbol();
-    llvm::Type *CBEltTy =
-        GV->getType()->getPointerElementType()->getArrayElementType();
-    cbIndexDepth = 1;
-    while (CBEltTy->isArrayTy()) {
-      CBEltTy = CBEltTy->getArrayElementType();
-      cbIndexDepth++;
-    }
-
-    // Add one level struct type to match normal case.
-    llvm::StructType *CBStructTy =
-        llvm::StructType::create({CBEltTy}, CB.GetGlobalName());
-
-    llvm::ArrayType *CBArrayTy =
-        llvm::ArrayType::get(CBStructTy, CB.GetRangeSize());
-    cbGV = new llvm::GlobalVariable(M, CBArrayTy, /*IsConstant*/ true,
-                                    llvm::GlobalValue::ExternalLinkage,
-                                    /*InitVal*/ nullptr, CB.GetGlobalName());
-
-    cbTy = llvm::PointerType::get(CBStructTy,
-                                  cbGV->getType()->getPointerAddressSpace());
-  }
-
-  CB.SetGlobalSymbol(cbGV);
-
-  llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext());
-  llvm::Type *idxTy = opcodeTy;
-  Constant *zeroIdx = ConstantInt::get(opcodeTy, 0);
-
-  MDNode *MD = HLM.DxilCBufferToMDNode(CB);
-
-  Value *HandleArgs[] = { zeroIdx, cbGV, zeroIdx };
-  Function *CreateHandleFunc = GetOrCreateHLCreateHandle(HLM, HandleTy, HandleArgs, MD);
-
-  llvm::FunctionType *SubscriptFuncTy =
-      llvm::FunctionType::get(cbTy, { opcodeTy, HandleTy, idxTy}, false);
-
-  Function *subscriptFunc =
-      GetOrCreateHLFunction(M, SubscriptFuncTy, HLOpcodeGroup::HLSubscript,
-                            (unsigned)HLSubscriptOpcode::CBufferSubscript);
-  Constant *opArg = ConstantInt::get(opcodeTy, (unsigned)HLSubscriptOpcode::CBufferSubscript);
-  Value *args[] = { opArg, nullptr, zeroIdx };
+void CGMSHLSLRuntime::FinishCodeGen() {
+  HLModule &HLM = *m_pHLModule;
+  llvm::Module &M = TheModule;
 
-  llvm::LLVMContext &Context = M.getContext();
-  llvm::Type *i32Ty = llvm::Type::getInt32Ty(Context);
-  Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
+  // Do this before CloneShaderEntry and TranslateRayQueryConstructor to avoid
+  // update valToResPropertiesMap for cloned inst.
+  FinishIntrinsics(HLM, m_IntrinsicMap, valToResPropertiesMap);
 
-  std::vector<Value *> indexArray(CB.GetConstants().size());
-  std::vector<std::unordered_set<Function*>> constUsedFuncList(CB.GetConstants().size());
+  FinishEntries(HLM, Entry, CGM, entryFunctionMap, HSEntryPatchConstantFuncAttr,
+                patchConstantFunctionMap, patchConstantFunctionPropsMap);
 
-  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
-    Value *idx = ConstantInt::get(i32Ty, C->GetID());
-    indexArray[C->GetID()] = idx;
+  ReplaceConstStaticGlobals(staticConstGlobalInitListMap,
+                            staticConstGlobalCtorMap);
 
-    Value *GV = C->GetGlobalSymbol();
-    MarkUsedFunctionForConst(GV, constUsedFuncList[C->GetID()]);
+  // Create copy for clip plane.
+  if (!clipPlaneFuncList.empty()) {
+    FinishClipPlane(HLM, clipPlaneFuncList, debugInfoMap, CGM);
   }
 
-  for (Function &F : M.functions()) {
-    if (F.isDeclaration())
-      continue;
-
-    if (GetHLOpcodeGroupByName(&F) != HLOpcodeGroup::NotHL)
-      continue;
-
-    IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
-
-    // create HL subscript to make all the use of cbuffer start from it.
-    HandleArgs[HLOperandIndex::kCreateHandleResourceOpIdx] = cbGV;
-    CallInst *Handle = Builder.CreateCall(CreateHandleFunc, HandleArgs);
-    args[HLOperandIndex::kSubscriptObjectOpIdx] = Handle;
-    Instruction *cbSubscript =
-        cast<Instruction>(Builder.CreateCall(subscriptFunc, {args}));
-
-    // Replace constant var with GEP pGV
-    for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
-      Value *GV = C->GetGlobalSymbol();
-      if (constUsedFuncList[C->GetID()].count(&F) == 0)
-        continue;
-
-      Value *idx = indexArray[C->GetID()];
-      if (!isCBArray) {
-        Instruction *GEP = cast<Instruction>(
-            Builder.CreateInBoundsGEP(cbSubscript, {zero, idx}));
-        // TODO: make sure the debug info is synced to GEP.
-        // GEP->setDebugLoc(GV);
-        ReplaceUseInFunction(GV, GEP, &F, Builder);
-        // Delete if no use in F.
-        if (GEP->user_empty())
-          GEP->eraseFromParent();
-      } else {
-        for (auto U = GV->user_begin(); U != GV->user_end();) {
-          User *user = *(U++);
-          if (user->user_empty())
-            continue;
-          Instruction *I = dyn_cast<Instruction>(user);
-          if (I && I->getParent()->getParent() != &F)
-            continue;
-
-          IRBuilder<> *instBuilder = &Builder;
-          unique_ptr<IRBuilder<>> B;
-          if (I) {
-            B = llvm::make_unique<IRBuilder<>>(I);
-            instBuilder = B.get();
-          }
-
-          GEPOperator *GEPOp = cast<GEPOperator>(user);
-          std::vector<Value *> idxList;
-
-          DXASSERT(GEPOp->getNumIndices() >= 1 + cbIndexDepth,
-                   "must indexing ConstantBuffer array");
-          idxList.reserve(GEPOp->getNumIndices() - (cbIndexDepth - 1));
-
-          gep_type_iterator GI = gep_type_begin(*GEPOp),
-                            E = gep_type_end(*GEPOp);
-          idxList.push_back(GI.getOperand());
-          // change array index with 0 for struct index.
-          idxList.push_back(zero);
-          GI++;
-          Value *arrayIdx = GI.getOperand();
-          GI++;
-          for (unsigned curIndex = 1; GI != E && curIndex < cbIndexDepth;
-               ++GI, ++curIndex) {
-            arrayIdx = instBuilder->CreateMul(
-                arrayIdx, Builder.getInt32(GI->getArrayNumElements()));
-            arrayIdx = instBuilder->CreateAdd(arrayIdx, GI.getOperand());
-          }
-
-          for (; GI != E; ++GI) {
-            idxList.push_back(GI.getOperand());
-          }
-
-          HandleArgs[HLOperandIndex::kCreateHandleIndexOpIdx] = arrayIdx;
-          CallInst *Handle =
-              instBuilder->CreateCall(CreateHandleFunc, HandleArgs);
-          args[HLOperandIndex::kSubscriptObjectOpIdx] = Handle;
-          args[HLOperandIndex::kSubscriptIndexOpIdx] = arrayIdx;
+  // Add Reg bindings for resource in cb.
+  AddRegBindingsForResourceInConstantBuffer(HLM, constantRegBindingMap);
 
-          Instruction *cbSubscript =
-              cast<Instruction>(instBuilder->CreateCall(subscriptFunc, {args}));
+  // Allocate constant buffers.
+  // Create Global variable and type annotation for each CBuffer.
+  FinishCBuffer(HLM, CBufferType, m_ConstVarAnnotationMap);
 
-          Instruction *NewGEP = cast<Instruction>(
-              instBuilder->CreateInBoundsGEP(cbSubscript, idxList));
+  // Translate calls to RayQuery constructor into hl Allocate calls
+  TranslateRayQueryConstructor(HLM);
 
-          ReplaceUseInFunction(GEPOp, NewGEP, &F, *instBuilder);
-        }
-      }
-    }
-    // Delete if no use in F.
-    if (cbSubscript->user_empty()) {
-      cbSubscript->eraseFromParent();
-      Handle->eraseFromParent();
-    } else {
-      // merge GEP use for cbSubscript.
-      HLModule::MergeGepUse(cbSubscript);
-    }
+  bool bIsLib = HLM.GetShaderModel()->IsLib();
+  if (!bIsLib) {
+    // need this for "llvm.global_dtors"?
+    ProcessCtorFunctions(M, "llvm.global_ctors",
+                         Entry.Func->getEntryBlock().getFirstInsertionPt());
   }
-  return true;
-}
-
-static void ConstructCBufferAnnotation(
-    HLCBuffer &CB, DxilTypeSystem &dxilTypeSys,
-    std::unordered_map<Constant *, DxilFieldAnnotation> &AnnotationMap) {
-  Value *GV = CB.GetGlobalSymbol();
 
-  llvm::StructType *CBStructTy =
-          dyn_cast<llvm::StructType>(GV->getType()->getPointerElementType());
+  UpdateLinkage(HLM, CGM, m_ExportMap, entryFunctionMap,
+                patchConstantFunctionMap);
 
-  if (!CBStructTy) {
-    // For Array of ConstantBuffer.
-    llvm::ArrayType *CBArrayTy =
-        cast<llvm::ArrayType>(GV->getType()->getPointerElementType());
-    CBStructTy = cast<llvm::StructType>(CBArrayTy->getArrayElementType());
-  }
-
-  DxilStructAnnotation *CBAnnotation =
-      dxilTypeSys.AddStructAnnotation(CBStructTy);
-  CBAnnotation->SetCBufferSize(CB.GetSize());
+  // Do simple transform to make later lower pass easier.
+  SimpleTransformForHLDXIR(&M);
 
-  // Set fieldAnnotation for each constant var.
-  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
-    Constant *GV = C->GetGlobalSymbol();
-    DxilFieldAnnotation &fieldAnnotation =
-        CBAnnotation->GetFieldAnnotation(C->GetID());
-    fieldAnnotation = AnnotationMap[GV];
-    // This is after CBuffer allocation.
-    fieldAnnotation.SetCBufferOffset(C->GetLowerBound());
-    fieldAnnotation.SetFieldName(C->GetGlobalName());
+  // Handle lang extensions if provided.
+  if (CGM.getCodeGenOpts().HLSLExtensionsCodegen) {
+    ExtensionCodeGen(HLM, CGM);
   }
-}
-
-static void ConstructCBuffer(
-    HLModule *pHLModule,
-    llvm::Type *CBufferType,
-    std::unordered_map<Constant *, DxilFieldAnnotation> &AnnotationMap) {
-  DxilTypeSystem &dxilTypeSys = pHLModule->GetTypeSystem();
-  llvm::Type *HandleTy = pHLModule->GetOP()->GetHandleType();
-  for (unsigned i = 0; i < pHLModule->GetCBuffers().size(); i++) {
-    HLCBuffer &CB = *static_cast<HLCBuffer*>(&(pHLModule->GetCBuffer(i)));
-    if (CB.GetConstants().size() == 0) {
-      // Create Fake variable for cbuffer which is empty.
-      llvm::GlobalVariable *pGV = new llvm::GlobalVariable(
-          *pHLModule->GetModule(), CBufferType, true,
-          llvm::GlobalValue::ExternalLinkage, nullptr, CB.GetGlobalName());
-      CB.SetGlobalSymbol(pGV);
-    } else {
-      bool bCreated =
-          CreateCBufferVariable(CB, *pHLModule, HandleTy);
-      if (bCreated)
-        ConstructCBufferAnnotation(CB, dxilTypeSys, AnnotationMap);
-      else {
-        // Create Fake variable for cbuffer which is unused.
-        llvm::GlobalVariable *pGV = new llvm::GlobalVariable(
-            *pHLModule->GetModule(), CBufferType, true,
-            llvm::GlobalValue::ExternalLinkage, nullptr, CB.GetGlobalName());
-        CB.SetGlobalSymbol(pGV);
-      }
-    }
-    // Clear the constants which useless now.
-    CB.GetConstants().clear();
-  }
-}
-
-static void ReplaceBoolVectorSubscript(CallInst *CI) {
-  Value *Ptr = CI->getArgOperand(0);
-  Value *Idx = CI->getArgOperand(1);
-  Value *IdxList[] = {ConstantInt::get(Idx->getType(), 0), Idx};
-
-  for (auto It = CI->user_begin(), E = CI->user_end(); It != E;) {
-    Instruction *user = cast<Instruction>(*(It++));
-
-    IRBuilder<> Builder(user);
-    Value *GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
-
-    if (LoadInst *LI = dyn_cast<LoadInst>(user)) {
-      Value *NewLd = Builder.CreateLoad(GEP);
-      Value *cast = Builder.CreateZExt(NewLd, LI->getType());
-      LI->replaceAllUsesWith(cast);
-      LI->eraseFromParent();
-    } else {
-      // Must be a store inst here.
-      StoreInst *SI = cast<StoreInst>(user);
-      Value *V = SI->getValueOperand();
-      Value *cast =
-          Builder.CreateICmpNE(V, llvm::ConstantInt::get(V->getType(), 0));
-      Builder.CreateStore(cast, GEP);
-      SI->eraseFromParent();
-    }
-  }
-  CI->eraseFromParent();
-}
-
-static void ReplaceBoolVectorSubscript(Function *F) {
-  for (auto It = F->user_begin(), E = F->user_end(); It != E; ) {
-    User *user = *(It++);
-    CallInst *CI = cast<CallInst>(user);
-    ReplaceBoolVectorSubscript(CI);
-  }
-}
-
-// Add function body for intrinsic if possible.
-static Function *CreateOpFunction(llvm::Module &M, Function *F,
-                                  llvm::FunctionType *funcTy,
-                                  HLOpcodeGroup group, unsigned opcode) {
-  Function *opFunc = nullptr;
-
-  llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext());
-  if (group == HLOpcodeGroup::HLIntrinsic) {
-    IntrinsicOp intriOp = static_cast<IntrinsicOp>(opcode);
-    switch (intriOp) {
-    case IntrinsicOp::MOP_Append: 
-    case IntrinsicOp::MOP_Consume: {
-      bool bAppend = intriOp == IntrinsicOp::MOP_Append;
-      llvm::Type *handleTy = funcTy->getParamType(HLOperandIndex::kHandleOpIdx);
-      // Don't generate body for OutputStream::Append.
-      if (bAppend && HLModule::IsStreamOutputPtrType(handleTy)) {
-        opFunc = GetOrCreateHLFunction(M, funcTy, group, opcode);
-        break;
-      }
-
-      opFunc = GetOrCreateHLFunctionWithBody(M, funcTy, group, opcode,
-                                             bAppend ? "append" : "consume");
-      llvm::Type *counterTy = llvm::Type::getInt32Ty(M.getContext());
-      llvm::FunctionType *IncCounterFuncTy =
-          llvm::FunctionType::get(counterTy, {opcodeTy, handleTy}, false);
-      unsigned counterOpcode = bAppend ? (unsigned)IntrinsicOp::MOP_IncrementCounter:
-          (unsigned)IntrinsicOp::MOP_DecrementCounter;
-      Function *incCounterFunc =
-          GetOrCreateHLFunction(M, IncCounterFuncTy, group,
-                                counterOpcode);
-
-      llvm::Type *idxTy = counterTy;
-      llvm::Type *valTy = bAppend ?
-          funcTy->getParamType(HLOperandIndex::kAppendValOpIndex):funcTy->getReturnType();
-
-      // Return type for subscript should be pointer type, hence in memory representation
-      llvm::Type *subscriptTy = valTy;
-      bool isBoolScalarOrVector = false;
-      if (!subscriptTy->isPointerTy()) {
-        if (subscriptTy->getScalarType()->isIntegerTy(1)) {
-          isBoolScalarOrVector = true;
-          llvm::Type *memReprType = llvm::IntegerType::get(subscriptTy->getContext(), 32);
-          subscriptTy = subscriptTy->isVectorTy()
-            ? llvm::VectorType::get(memReprType, subscriptTy->getVectorNumElements())
-            : memReprType;
-        }
-        subscriptTy = llvm::PointerType::get(subscriptTy, 0);
-      }
-
-      llvm::FunctionType *SubscriptFuncTy =
-          llvm::FunctionType::get(subscriptTy, {opcodeTy, handleTy, idxTy}, false);
-
-      Function *subscriptFunc =
-          GetOrCreateHLFunction(M, SubscriptFuncTy, HLOpcodeGroup::HLSubscript,
-                                (unsigned)HLSubscriptOpcode::DefaultSubscript);
-
-      BasicBlock *BB = BasicBlock::Create(opFunc->getContext(), "Entry", opFunc);
-      IRBuilder<> Builder(BB);
-      auto argIter = opFunc->args().begin();
-      // Skip the opcode arg.
-      argIter++;
-      Argument *thisArg = argIter++;
-      // int counter = IncrementCounter/DecrementCounter(Buf);
-      Value *incCounterOpArg =
-          ConstantInt::get(idxTy, counterOpcode);
-      Value *counter =
-          Builder.CreateCall(incCounterFunc, {incCounterOpArg, thisArg});
-      // Buf[counter];
-      Value *subscriptOpArg = ConstantInt::get(
-          idxTy, (unsigned)HLSubscriptOpcode::DefaultSubscript);
-      Value *subscript =
-          Builder.CreateCall(subscriptFunc, {subscriptOpArg, thisArg, counter});
-
-      if (bAppend) {
-        Argument *valArg = argIter;
-        // Buf[counter] = val;
-        if (valTy->isPointerTy()) {
-          unsigned size = M.getDataLayout().getTypeAllocSize(subscript->getType()->getPointerElementType());
-          Builder.CreateMemCpy(subscript, valArg, size, 1);
-        }
-        else {
-          Value *storedVal = valArg;
-          // Convert to memory representation
-          if (isBoolScalarOrVector)
-            storedVal = Builder.CreateZExt(storedVal, subscriptTy->getPointerElementType(), "frombool");
-          Builder.CreateStore(storedVal, subscript);
-        }
-        Builder.CreateRetVoid();
-      } else {
-        // return Buf[counter];
-        if (valTy->isPointerTy())
-          Builder.CreateRet(subscript);
-        else {
-          Value *retVal = Builder.CreateLoad(subscript);
-          // Convert to register representation
-          if (isBoolScalarOrVector)
-            retVal = Builder.CreateICmpNE(retVal, Constant::getNullValue(retVal->getType()), "tobool");
-          Builder.CreateRet(retVal);
-        }
-      }
-    } break;
-    case IntrinsicOp::IOP_sincos: {
-      opFunc = GetOrCreateHLFunctionWithBody(M, funcTy, group, opcode, "sincos");
-      llvm::Type *valTy = funcTy->getParamType(HLOperandIndex::kTrinaryOpSrc0Idx);
-
-      llvm::FunctionType *sinFuncTy =
-          llvm::FunctionType::get(valTy, {opcodeTy, valTy}, false);
-      unsigned sinOp = static_cast<unsigned>(IntrinsicOp::IOP_sin);
-      unsigned cosOp = static_cast<unsigned>(IntrinsicOp::IOP_cos);
-      Function *sinFunc = GetOrCreateHLFunction(M, sinFuncTy, group, sinOp);
-      Function *cosFunc = GetOrCreateHLFunction(M, sinFuncTy, group, cosOp);
-
-      BasicBlock *BB = BasicBlock::Create(opFunc->getContext(), "Entry", opFunc);
-      IRBuilder<> Builder(BB);
-      auto argIter = opFunc->args().begin();
-      // Skip the opcode arg.
-      argIter++;
-      Argument *valArg = argIter++;
-      Argument *sinPtrArg = argIter++;
-      Argument *cosPtrArg = argIter++;
-
-      Value *sinOpArg =
-          ConstantInt::get(opcodeTy, sinOp);
-      Value *sinVal = Builder.CreateCall(sinFunc, {sinOpArg, valArg});
-      Builder.CreateStore(sinVal, sinPtrArg);
-
-      Value *cosOpArg =
-          ConstantInt::get(opcodeTy, cosOp);
-      Value *cosVal = Builder.CreateCall(cosFunc, {cosOpArg, valArg});
-      Builder.CreateStore(cosVal, cosPtrArg);
-      // Ret.
-      Builder.CreateRetVoid();
-    } break;
-    default:
-      opFunc = GetOrCreateHLFunction(M, funcTy, group, opcode);
-      break;
-    }
-  }
-  else if (group == HLOpcodeGroup::HLExtIntrinsic) {
-    llvm::StringRef fnName = F->getName();
-    llvm::StringRef groupName = GetHLOpcodeGroupNameByAttr(F);
-    opFunc = GetOrCreateHLFunction(M, funcTy, group, &groupName, &fnName, opcode);
-  }
-  else {
-    opFunc = GetOrCreateHLFunction(M, funcTy, group, opcode);
-  }
-
-  // Add attribute
-  if (F->hasFnAttribute(Attribute::ReadNone))
-    opFunc->addFnAttr(Attribute::ReadNone);
-  if (F->hasFnAttribute(Attribute::ReadOnly))
-    opFunc->addFnAttr(Attribute::ReadOnly);
-  return opFunc;
-}
-
-static Value *CreateHandleFromResPtr(
-    Value *ResPtr, HLModule &HLM, llvm::Type *HandleTy,
-    std::unordered_map<llvm::Type *, MDNode *> &resMetaMap,
-    IRBuilder<> &Builder) {
-  llvm::Type *objTy = ResPtr->getType()->getPointerElementType();
-  DXASSERT(resMetaMap.count(objTy), "cannot find resource type");
-  MDNode *MD = resMetaMap[objTy];
-  // Load to make sure resource only have Ld/St use so mem2reg could remove
-  // temp resource.
-  Value *ldObj = Builder.CreateLoad(ResPtr);
-  Value *opcode = Builder.getInt32(0);
-  Value *args[] = {opcode, ldObj};
-  Function *CreateHandle = GetOrCreateHLCreateHandle(HLM, HandleTy, args, MD);
-  CallInst *Handle = Builder.CreateCall(CreateHandle, args);
-  return Handle;
-}
-
-namespace {
-
-Value *CreateAnnotateHandle(HLModule &HLM, Value *Handle,
-                            DxilResourceProperties &RP, llvm::Type *ResTy,
-                            IRBuilder<> &Builder) {
-  Constant *RPConstant = resource_helper::getAsConstant(
-      RP, HLM.GetOP()->GetResourcePropertiesType(), *HLM.GetShaderModel());
-  return HLM.EmitHLOperationCall(
-      Builder, HLOpcodeGroup::HLAnnotateHandle,
-      (unsigned)HLOpcodeGroup::HLAnnotateHandle, Handle->getType(),
-      {Handle, Builder.getInt8((uint8_t)RP.Class),
-       Builder.getInt8((uint8_t)RP.Kind), RPConstant, UndefValue::get(ResTy)},
-      *HLM.GetModule());
-}
-
-void LowerGetResourceFromHeap(
-    HLModule &HLM, std::vector<std::pair<Function *, unsigned>> &intrinsicMap) {
-  llvm::Module &M = *HLM.GetModule();
-  llvm::Type *HandleTy = HLM.GetOP()->GetHandleType();
-  unsigned GetResFromHeapOp =
-      static_cast<unsigned>(IntrinsicOp::IOP_CreateResourceFromHeap);
-  DenseMap<Instruction *, Instruction *> ResourcePtrToHandlePtrMap;
-
-  for (auto it : intrinsicMap) {
-    unsigned opcode = it.second;
-    if (opcode != GetResFromHeapOp)
-      continue;
-    Function *F = it.first;
-    HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
-    if (group != HLOpcodeGroup::HLIntrinsic)
-      continue;
-    for (auto uit = F->user_begin(); uit != F->user_end();) {
-      CallInst *CI = cast<CallInst>(*(uit++));
-      Instruction *ResPtr = cast<Instruction>(CI->getArgOperand(0));
-      Value *Index = CI->getArgOperand(1);
-      IRBuilder<> Builder(CI);
-      // Make a handle from GetResFromHeap.
-      Value *Handle =
-          HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLIntrinsic,
-                                  GetResFromHeapOp, HandleTy, {Index}, M);
-
-      // Find the handle ptr for res ptr.
-      auto it = ResourcePtrToHandlePtrMap.find(ResPtr);
-      Instruction *HandlePtr = nullptr;
-      if (it != ResourcePtrToHandlePtrMap.end()) {
-        HandlePtr = it->second;
-      } else {
-        IRBuilder<> AllocaBuilder(
-            ResPtr->getParent()->getParent()->getEntryBlock().begin());
-        HandlePtr = AllocaBuilder.CreateAlloca(HandleTy);
-        ResourcePtrToHandlePtrMap[ResPtr] = HandlePtr;
-      }
-      // Store handle to handle ptr.
-      Builder.CreateStore(Handle, HandlePtr);
-      CI->eraseFromParent();
-    }
-  }
-
-  // Replace load of Resource ptr into load of handel ptr.
-  for (auto it : ResourcePtrToHandlePtrMap) {
-    Instruction *resPtr = it.first;
-    Instruction *handlePtr = it.second;
-
-    for (auto uit = resPtr->user_begin(); uit != resPtr->user_end();) {
-      User *U = *(uit++);
-      BitCastInst *BCI = cast<BitCastInst>(U);
-      DXASSERT(
-          dxilutil::IsHLSLResourceType(BCI->getType()->getPointerElementType()),
-          "illegal cast of resource ptr");
-      for (auto cuit = BCI->user_begin(); cuit != BCI->user_end();) {
-        LoadInst *LI = cast<LoadInst>(*(cuit++));
-        IRBuilder<> Builder(LI);
-        Value *Handle = Builder.CreateLoad(handlePtr);
-        Value *Res =
-            HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
-                                    (unsigned)HLCastOpcode::HandleToResCast,
-                                    LI->getType(), {Handle}, M);
-        LI->replaceAllUsesWith(Res);
-        LI->eraseFromParent();
-      }
-      BCI->eraseFromParent();
-    }
-    resPtr->eraseFromParent();
-  }
-}
-} // namespace
-
-void CGMSHLSLRuntime::AddOpcodeParamForIntrinsic(HLModule &HLM, Function *F,
-                                       unsigned opcode, llvm::Type *HandleTy,
-    std::unordered_map<llvm::Type *, MDNode*> &resMetaMap) {
-  llvm::Module &M = *HLM.GetModule();
-  llvm::FunctionType *oldFuncTy = F->getFunctionType();
-
-  SmallVector<llvm::Type *, 4> paramTyList;
-  // Add the opcode param
-  llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext());
-  paramTyList.emplace_back(opcodeTy);
-  paramTyList.append(oldFuncTy->param_begin(), oldFuncTy->param_end());
-
-  for (unsigned i = 1; i < paramTyList.size(); i++) {
-    llvm::Type *Ty = paramTyList[i];
-    if (Ty->isPointerTy()) {
-      Ty = Ty->getPointerElementType();
-      if (dxilutil::IsHLSLResourceType(Ty)) {
-        // Use handle type for resource type.
-        // This will make sure temp object variable only used by createHandle.
-        paramTyList[i] = HandleTy;
-      }
-    }
-  }
-
-  HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
-
-  if (group == HLOpcodeGroup::HLSubscript &&
-      opcode == static_cast<unsigned>(HLSubscriptOpcode::VectorSubscript)) {
-    llvm::FunctionType *FT = F->getFunctionType();
-    llvm::Type *VecArgTy = FT->getParamType(0);
-    llvm::VectorType *VType =
-        cast<llvm::VectorType>(VecArgTy->getPointerElementType());
-    llvm::Type *Ty = VType->getElementType();
-    DXASSERT(Ty->isIntegerTy(), "Only bool could use VectorSubscript");
-    llvm::IntegerType *ITy = cast<IntegerType>(Ty);
-
-    DXASSERT_LOCALVAR(ITy, ITy->getBitWidth() == 1, "Only bool could use VectorSubscript");
-
-    // The return type is i8*.
-    // Replace all uses with i1*.
-    ReplaceBoolVectorSubscript(F);
-    return;
-  }
-
-  bool isDoubleSubscriptFunc = group == HLOpcodeGroup::HLSubscript &&
-      opcode == static_cast<unsigned>(HLSubscriptOpcode::DoubleSubscript);
-
-  llvm::Type *RetTy = oldFuncTy->getReturnType();
-
-  if (isDoubleSubscriptFunc) {
-    CallInst *doubleSub = cast<CallInst>(*F->user_begin());
-   
-    // Change currentIdx type into coord type.
-    auto U = doubleSub->user_begin();
-    Value *user = *U;
-    CallInst *secSub = cast<CallInst>(user);
-    unsigned coordIdx = HLOperandIndex::kSubscriptIndexOpIdx;
-    // opcode operand not add yet, so the index need -1.
-    if (GetHLOpcodeGroupByName(secSub->getCalledFunction()) == HLOpcodeGroup::NotHL)
-      coordIdx -= 1;
-    
-    Value *coord = secSub->getArgOperand(coordIdx);
-
-    llvm::Type *coordTy = coord->getType();
-    paramTyList[HLOperandIndex::kSubscriptIndexOpIdx] = coordTy;
-    // Add the sampleIdx or mipLevel parameter to the end.
-    paramTyList.emplace_back(opcodeTy);
-    // Change return type to be resource ret type.
-    // opcode operand not add yet, so the index need -1.
-    Value *objPtr = doubleSub->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx-1);
-    // Must be a GEP
-    GEPOperator *objGEP = cast<GEPOperator>(objPtr);
-    gep_type_iterator GEPIt = gep_type_begin(objGEP), E = gep_type_end(objGEP);
-    llvm::Type *resTy = nullptr;
-    while (GEPIt != E) {
-      if (dxilutil::IsHLSLResourceType(*GEPIt)) {
-        resTy = *GEPIt;
-        break;
-      }
-      GEPIt++;
-    }
-
-    DXASSERT(resTy, "must find the resource type");
-    // Change object type to handle type.
-    paramTyList[HLOperandIndex::kSubscriptObjectOpIdx] = HandleTy;
-    // Change RetTy into pointer of resource reture type.
-    RetTy = cast<StructType>(resTy)->getElementType(0)->getPointerTo();
-
-    llvm::Type *sliceTy = objGEP->getType()->getPointerElementType();
-    DXIL::ResourceClass RC = HLM.GetResourceClass(sliceTy);
-    DXIL::ResourceKind RK = HLM.GetResourceKind(sliceTy);
-    HLM.AddResourceTypeAnnotation(resTy, RC, RK);
-  }
-
-  llvm::FunctionType *funcTy =
-      llvm::FunctionType::get(RetTy, paramTyList, false);
-
-  Function *opFunc = CreateOpFunction(M, F, funcTy, group, opcode);
-  StringRef lower = hlsl::GetHLLowerStrategy(F);
-  if (!lower.empty())
-    hlsl::SetHLLowerStrategy(opFunc, lower);
-
-  DxilTypeSystem &typeSys = HLM.GetTypeSystem();
-
-  for (auto user = F->user_begin(); user != F->user_end();) {
-    // User must be a call.
-    CallInst *oldCI = cast<CallInst>(*(user++));
-
-    SmallVector<Value *, 4> opcodeParamList;
-    Value *opcodeConst = Constant::getIntegerValue(opcodeTy, APInt(32, opcode));
-    opcodeParamList.emplace_back(opcodeConst);
-
-    opcodeParamList.append(oldCI->arg_operands().begin(),
-                           oldCI->arg_operands().end());
-    IRBuilder<> Builder(oldCI);
-
-    if (isDoubleSubscriptFunc) {
-      // Change obj to the resource pointer.
-      Value *objVal = opcodeParamList[HLOperandIndex::kSubscriptObjectOpIdx];
-      GEPOperator *objGEP = cast<GEPOperator>(objVal);
-      SmallVector<Value *, 8> IndexList;
-      IndexList.append(objGEP->idx_begin(), objGEP->idx_end());
-      Value *lastIndex = IndexList.back();
-      ConstantInt *constIndex = cast<ConstantInt>(lastIndex);
-      DXASSERT_LOCALVAR(constIndex, constIndex->getLimitedValue() == 1, "last index must 1");
-      // Remove the last index.
-      IndexList.pop_back();
-      objVal = objGEP->getPointerOperand();
-      if (IndexList.size() > 1)
-        objVal = Builder.CreateInBoundsGEP(objVal, IndexList);
-
-      Value *Handle =
-          CreateHandleFromResPtr(objVal, HLM, HandleTy, resMetaMap, Builder);
-      // Change obj to the resource pointer.
-      opcodeParamList[HLOperandIndex::kSubscriptObjectOpIdx] = Handle;
-
-      // Set idx and mipIdx.
-      Value *mipIdx = opcodeParamList[HLOperandIndex::kSubscriptIndexOpIdx];
-      auto U = oldCI->user_begin();
-      Value *user = *U;
-      CallInst *secSub = cast<CallInst>(user);
-      unsigned idxOpIndex = HLOperandIndex::kSubscriptIndexOpIdx;
-      if (GetHLOpcodeGroupByName(secSub->getCalledFunction()) == HLOpcodeGroup::NotHL)
-        idxOpIndex--;
-      Value *idx = secSub->getArgOperand(idxOpIndex);
-
-      DXASSERT(secSub->hasOneUse(), "subscript should only has one use");
-
-      // Add the sampleIdx or mipLevel parameter to the end.
-      opcodeParamList[HLOperandIndex::kSubscriptIndexOpIdx] = idx;
-      opcodeParamList.emplace_back(mipIdx);
-      // Insert new call before secSub to make sure idx is ready to use.
-      Builder.SetInsertPoint(secSub);
-    }
-
-    for (unsigned i = 1; i < opcodeParamList.size(); i++) {
-      Value *arg = opcodeParamList[i];
-      llvm::Type *Ty = arg->getType();
-      if (Ty->isPointerTy()) {
-        Ty = Ty->getPointerElementType();
-        if (dxilutil::IsHLSLResourceType(Ty)) {
-
-          DxilResourceProperties RP;
-          // Use object type directly, not by pointer.
-          // This will make sure temp object variable only used by ld/st.
-          if (GEPOperator *argGEP = dyn_cast<GEPOperator>(arg)) {
-            std::vector<Value*> idxList(argGEP->idx_begin(), argGEP->idx_end());
-            // Create instruction to avoid GEPOperator.
-            GetElementPtrInst *GEP = GetElementPtrInst::CreateInBounds(argGEP->getPointerOperand(), 
-                idxList);
-            Builder.Insert(GEP);
-            arg = GEP;
-          }
-
-          llvm::Type *ResTy = arg->getType()->getPointerElementType();
-
-          auto RPIt = valToResPropertiesMap.find(arg);
-          if (RPIt != valToResPropertiesMap.end())
-          {
-            RP = RPIt->second;
-          } else {
-            // Must be GEP.
-            GEPOperator *GEP = cast<GEPOperator>(arg);
-            // Find RP from GEP.
-            Value *Ptr = GEP->getPointerOperand();
-            // When Ptr is array of resource, check if it is another GEP.
-            while (dxilutil::IsHLSLResourceType(
-                    dxilutil::GetArrayEltTy(Ptr->getType()))) {
-              if (GEPOperator *ParentGEP = dyn_cast<GEPOperator>(Ptr)) {
-                GEP = ParentGEP;
-                Ptr = GEP->getPointerOperand();
-              } else {
-                break;
-              }
-            }
-
-            RPIt = valToResPropertiesMap.find(Ptr);
-            // When ptr is array of resource, ptr could be in valToResPropertiesMap.
-            if (RPIt != valToResPropertiesMap.end()) {
-              RP = RPIt->second;
-            } else {
-              DxilStructAnnotation *Anno = nullptr;
-
-              for (auto gepIt = gep_type_begin(GEP), E = gep_type_end(GEP);
-                   gepIt != E; ++gepIt) {
-
-                if (StructType *ST = dyn_cast<StructType>(*gepIt)) {
-                  Anno = typeSys.GetStructAnnotation(ST);
-                  DXASSERT(Anno, "missing type annotation");
-
-                  unsigned Index = cast<ConstantInt>(gepIt.getOperand())->getLimitedValue();
-
-                  DxilFieldAnnotation &fieldAnno =
-                      Anno->GetFieldAnnotation(Index);
-                  if (fieldAnno.HasResourceAttribute()) {
-                    MDNode *resAttrib = fieldAnno.GetResourceAttribute();
-                    DxilResourceBase R(DXIL::ResourceClass::Invalid);
-                    HLM.LoadDxilResourceBaseFromMDNode(resAttrib, R);
-                    switch (R.GetClass()) {
-                    case DXIL::ResourceClass::SRV:
-                    case DXIL::ResourceClass::UAV: {
-                      DxilResource Res;
-                      HLM.LoadDxilResourceFromMDNode(resAttrib, Res);
-                      RP = resource_helper::loadFromResourceBase(&Res);
-                    } break;
-                    case DXIL::ResourceClass::Sampler: {
-                      DxilSampler Sampler;
-                      HLM.LoadDxilSamplerFromMDNode(resAttrib, Sampler);
-                      RP = resource_helper::loadFromResourceBase(&Sampler);
-                    } break;
-                    default:
-                      DXASSERT(
-                          0, "invalid resource attribute in filed annotation");
-                      break;
-                    }
-                    break;
-                  }
-                }
-              }
-            }
-          }
-
-          DXASSERT(RP.Class != DXIL::ResourceClass::Invalid, "invalid resource properties");
-          Value *Handle = CreateHandleFromResPtr(arg, HLM, HandleTy,
-                                                 resMetaMap, Builder);
-          Handle = CreateAnnotateHandle(HLM, Handle, RP, ResTy, Builder);
-          opcodeParamList[i] = Handle;
-        }
-      }
-    }
-
-    Value *CI = Builder.CreateCall(opFunc, opcodeParamList);
-    if (!isDoubleSubscriptFunc) {
-      // replace new call and delete the old call
-      oldCI->replaceAllUsesWith(CI);
-      oldCI->eraseFromParent();
-    } else {
-      // For double script.
-      // Replace single users use with new CI.
-      auto U = oldCI->user_begin();
-      Value *user = *U;
-      CallInst *secSub = cast<CallInst>(user);
-      secSub->replaceAllUsesWith(CI);
-      secSub->eraseFromParent();
-      oldCI->eraseFromParent();
-    }
-  }
-  // delete the function
-  F->eraseFromParent();
-}
-
-void CGMSHLSLRuntime::AddOpcodeParamForIntrinsics(
-    HLModule &HLM
-    , std::vector<std::pair<Function *, unsigned>> &intrinsicMap,
-    std::unordered_map<llvm::Type *, MDNode*> &resMetaMap) {
-  llvm::Type *HandleTy = HLM.GetOP()->GetHandleType();
-  for (auto mapIter : intrinsicMap) {
-    Function *F = mapIter.first;
-    if (F->user_empty()) {
-      // delete the function
-      F->eraseFromParent();
-      continue;
-    }
-
-    unsigned opcode = mapIter.second;
-    AddOpcodeParamForIntrinsic(HLM, F, opcode, HandleTy, resMetaMap);
-  }
-}
-
-static Value *CastLdValue(Value *Ptr, llvm::Type *FromTy, llvm::Type *ToTy, IRBuilder<> &Builder) {
-  if (ToTy->isVectorTy()) {
-    unsigned vecSize = ToTy->getVectorNumElements();
-    if (vecSize == 1 && ToTy->getVectorElementType() == FromTy) {
-      Value *V = Builder.CreateLoad(Ptr);
-      // ScalarToVec1Splat
-      // Change scalar into vec1.
-      Value *Vec1 = UndefValue::get(ToTy);
-      return Builder.CreateInsertElement(Vec1, V, (uint64_t)0);
-    } else if (vecSize == 1 && FromTy->isIntegerTy()
-      && ToTy->getVectorElementType()->isIntegerTy(1)) {
-      // load(bitcast i32* to <1 x i1>*)
-      // Rewrite to
-      // insertelement(icmp ne (load i32*), 0)
-      Value *IntV = Builder.CreateLoad(Ptr);
-      Value *BoolV = Builder.CreateICmpNE(IntV, ConstantInt::get(IntV->getType(), 0), "tobool");
-      Value *Vec1 = UndefValue::get(ToTy);
-      return Builder.CreateInsertElement(Vec1, BoolV, (uint64_t)0);
-    } else if (FromTy->isVectorTy() && vecSize == 1) {
-      Value *V = Builder.CreateLoad(Ptr);
-      // VectorTrunc
-      // Change vector into vec1.
-      int mask[] = {0};
-      return Builder.CreateShuffleVector(V, V, mask);
-    } else if (FromTy->isArrayTy()) {
-      llvm::Type *FromEltTy = FromTy->getArrayElementType();
-
-      llvm::Type *ToEltTy = ToTy->getVectorElementType();
-      if (FromTy->getArrayNumElements() == vecSize && FromEltTy == ToEltTy) {
-        // ArrayToVector.
-        Value *NewLd = UndefValue::get(ToTy);
-        Value *zeroIdx = Builder.getInt32(0);
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = Builder.CreateInBoundsGEP(
-              Ptr, {zeroIdx, Builder.getInt32(i)});
-          Value *Elt = Builder.CreateLoad(GEP);
-          NewLd = Builder.CreateInsertElement(NewLd, Elt, i);
-        }
-        return NewLd;
-      }
-    }
-  } else if (FromTy == Builder.getInt1Ty()) {
-    Value *V = Builder.CreateLoad(Ptr);
-    // BoolCast
-    DXASSERT_NOMSG(ToTy->isIntegerTy());
-    return Builder.CreateZExt(V, ToTy);
-  }
-
-  return nullptr;
-}
-
-static Value  *CastStValue(Value *Ptr, Value *V, llvm::Type *FromTy, llvm::Type *ToTy, IRBuilder<> &Builder) {
-  if (ToTy->isVectorTy()) {
-    unsigned vecSize = ToTy->getVectorNumElements();
-    if (vecSize == 1 && ToTy->getVectorElementType() == FromTy) {
-      // ScalarToVec1Splat
-      // Change vec1 back to scalar.
-      Value *Elt = Builder.CreateExtractElement(V, (uint64_t)0);
-      return Elt;
-    } else if (FromTy->isVectorTy() && vecSize == 1) {
-      // VectorTrunc
-      // Change vec1 into vector.
-      // Should not happen.
-      // Reported error at Sema::ImpCastExprToType.
-      DXASSERT_NOMSG(0);
-    } else if (FromTy->isArrayTy()) {
-      llvm::Type *FromEltTy = FromTy->getArrayElementType();
-
-      llvm::Type *ToEltTy = ToTy->getVectorElementType();
-      if (FromTy->getArrayNumElements() == vecSize && FromEltTy == ToEltTy) {
-        // ArrayToVector.
-        Value *zeroIdx = Builder.getInt32(0);
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *Elt = Builder.CreateExtractElement(V, i);
-          Value *GEP = Builder.CreateInBoundsGEP(
-              Ptr, {zeroIdx, Builder.getInt32(i)});
-          Builder.CreateStore(Elt, GEP);
-        }
-        // The store already done.
-        // Return null to ignore use of the return value.
-        return nullptr;
-      }
-    }
-  } else if (FromTy == Builder.getInt1Ty()) {
-    // BoolCast
-    // Change i1 to ToTy.
-    DXASSERT_NOMSG(ToTy->isIntegerTy());
-    Value *CastV = Builder.CreateICmpNE(V, ConstantInt::get(V->getType(), 0));
-    return CastV;
-  }
-
-  return nullptr;
-}
-
-static bool SimplifyBitCastLoad(LoadInst *LI, llvm::Type *FromTy, llvm::Type *ToTy, Value *Ptr) {
-  IRBuilder<> Builder(LI);
-  // Cast FromLd to ToTy.
-  Value *CastV = CastLdValue(Ptr, FromTy, ToTy, Builder);
-  if (CastV) {
-    LI->replaceAllUsesWith(CastV);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static bool SimplifyBitCastStore(StoreInst *SI, llvm::Type *FromTy, llvm::Type *ToTy, Value *Ptr) {
-  IRBuilder<> Builder(SI);
-  Value *V = SI->getValueOperand();
-  // Cast Val to FromTy.
-  Value *CastV = CastStValue(Ptr, V, FromTy, ToTy, Builder);
-  if (CastV) {
-    Builder.CreateStore(CastV, Ptr);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static bool SimplifyBitCastGEP(GEPOperator *GEP, llvm::Type *FromTy, llvm::Type *ToTy, Value *Ptr) {
-  if (ToTy->isVectorTy()) {
-    unsigned vecSize = ToTy->getVectorNumElements();
-    if (vecSize == 1 && ToTy->getVectorElementType() == FromTy) {
-      // ScalarToVec1Splat
-      GEP->replaceAllUsesWith(Ptr);
-      return true;
-    } else if (FromTy->isVectorTy() && vecSize == 1) {
-      // VectorTrunc
-      DXASSERT_NOMSG(
-          !isa<llvm::VectorType>(GEP->getType()->getPointerElementType()));
-      IRBuilder<> Builder(FromTy->getContext());
-      if (Instruction *I = dyn_cast<Instruction>(GEP))
-        Builder.SetInsertPoint(I);
-      std::vector<Value *> idxList(GEP->idx_begin(), GEP->idx_end());
-      Value *NewGEP = Builder.CreateInBoundsGEP(Ptr, idxList);
-      GEP->replaceAllUsesWith(NewGEP);
-      return true;
-    } else if (FromTy->isArrayTy()) {
-      llvm::Type *FromEltTy = FromTy->getArrayElementType();
-
-      llvm::Type *ToEltTy = ToTy->getVectorElementType();
-      if (FromTy->getArrayNumElements() == vecSize && FromEltTy == ToEltTy) {
-        // ArrayToVector.
-      }
-    }
-  } else if (FromTy == llvm::Type::getInt1Ty(FromTy->getContext())) {
-    // BoolCast
-  }
-  return false;
-}
-typedef SmallPtrSet<Instruction *, 4> SmallInstSet;
-static void SimplifyBitCast(BitCastOperator *BC, SmallInstSet &deadInsts) {
-  Value *Ptr = BC->getOperand(0);
-  llvm::Type *FromTy = Ptr->getType();
-  llvm::Type *ToTy = BC->getType();
-
-  if (!FromTy->isPointerTy() || !ToTy->isPointerTy())
-    return;
-
-  FromTy = FromTy->getPointerElementType();
-  ToTy = ToTy->getPointerElementType();
-
-  // Take care case like %2 = bitcast %struct.T* %1 to <1 x float>*.
-  bool GEPCreated = false;
-  if (FromTy->isStructTy()) {
-    IRBuilder<> Builder(FromTy->getContext());
-    if (Instruction *I = dyn_cast<Instruction>(BC))
-      Builder.SetInsertPoint(I);
-
-    Value *zeroIdx = Builder.getInt32(0);
-    unsigned nestLevel = 1;
-    while (llvm::StructType *ST = dyn_cast<llvm::StructType>(FromTy)) {
-      if (ST->getNumElements() == 0) break;
-      FromTy = ST->getElementType(0);
-      nestLevel++;
-    }
-    std::vector<Value *> idxList(nestLevel, zeroIdx);
-    Ptr = Builder.CreateGEP(Ptr, idxList);
-    GEPCreated = true;
-  }
-
-  for (User *U : BC->users()) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      if (SimplifyBitCastLoad(LI, FromTy, ToTy, Ptr)) {
-        LI->dropAllReferences();
-        deadInsts.insert(LI);
-      }
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      if (SimplifyBitCastStore(SI, FromTy, ToTy, Ptr)) {
-        SI->dropAllReferences();
-        deadInsts.insert(SI);
-      }
-    } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-      if (SimplifyBitCastGEP(GEP, FromTy, ToTy, Ptr))
-        if (Instruction *I = dyn_cast<Instruction>(GEP)) {
-          I->dropAllReferences();
-          deadInsts.insert(I);
-        }
-    } else if (dyn_cast<CallInst>(U)) {
-      // Skip function call.
-    } else if (dyn_cast<BitCastInst>(U)) {
-      // Skip bitcast.
-    } else if (dyn_cast<AddrSpaceCastInst>(U)) {
-      // Skip addrspacecast.
-    } else {
-      DXASSERT(0, "not support yet");
-    }
-  }
-
-  // We created a GEP instruction but didn't end up consuming it, so delete it.
-  if (GEPCreated && Ptr->use_empty()) {
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
-      GEP->eraseFromParent();
-    else
-      cast<Constant>(Ptr)->destroyConstant();
-  }
-}
-
-typedef float(__cdecl *FloatUnaryEvalFuncType)(float);
-typedef double(__cdecl *DoubleUnaryEvalFuncType)(double);
-
-typedef APInt(__cdecl *IntBinaryEvalFuncType)(const APInt&, const APInt&);
-typedef float(__cdecl *FloatBinaryEvalFuncType)(float, float);
-typedef double(__cdecl *DoubleBinaryEvalFuncType)(double, double);
-
-static Value * EvalUnaryIntrinsic(ConstantFP *fpV,
-                               FloatUnaryEvalFuncType floatEvalFunc,
-                               DoubleUnaryEvalFuncType doubleEvalFunc) {
-  llvm::Type *Ty = fpV->getType();
-  Value *Result = nullptr;
-  if (Ty->isDoubleTy()) {
-    double dV = fpV->getValueAPF().convertToDouble();
-    Value *dResult = ConstantFP::get(Ty, doubleEvalFunc(dV));
-    Result = dResult;
-  } else {
-    DXASSERT_NOMSG(Ty->isFloatTy());
-    float fV = fpV->getValueAPF().convertToFloat();
-    Value *dResult = ConstantFP::get(Ty, floatEvalFunc(fV));
-    Result = dResult;
-  }
-  return Result;
-}
-
-static Value * EvalBinaryIntrinsic(Constant *cV0, Constant *cV1,
-                               FloatBinaryEvalFuncType floatEvalFunc,
-                               DoubleBinaryEvalFuncType doubleEvalFunc,
-                               IntBinaryEvalFuncType intEvalFunc) {
-  llvm::Type *Ty = cV0->getType();
-  Value *Result = nullptr;
-  if (Ty->isDoubleTy()) {
-    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
-    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
-    double dV0 = fpV0->getValueAPF().convertToDouble();
-    double dV1 = fpV1->getValueAPF().convertToDouble();
-    Value *dResult = ConstantFP::get(Ty, doubleEvalFunc(dV0, dV1));
-    Result = dResult;
-  } else if (Ty->isFloatTy()) {
-    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
-    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
-    float fV0 = fpV0->getValueAPF().convertToFloat();
-    float fV1 = fpV1->getValueAPF().convertToFloat();
-    Value *dResult = ConstantFP::get(Ty, floatEvalFunc(fV0, fV1));
-    Result = dResult;
-  } else {
-    DXASSERT_NOMSG(Ty->isIntegerTy());
-    DXASSERT_NOMSG(intEvalFunc);
-    ConstantInt *ciV0 = cast<ConstantInt>(cV0);
-    ConstantInt *ciV1 = cast<ConstantInt>(cV1);
-    const APInt& iV0 = ciV0->getValue();
-    const APInt& iV1 = ciV1->getValue();
-    Value *dResult = ConstantInt::get(Ty, intEvalFunc(iV0, iV1));
-    Result = dResult;
-  }
-  return Result;
-}
-
-static Value * EvalUnaryIntrinsic(CallInst *CI,
-                               FloatUnaryEvalFuncType floatEvalFunc,
-                               DoubleUnaryEvalFuncType doubleEvalFunc) {
-  Value *V = CI->getArgOperand(0);
-  llvm::Type *Ty = CI->getType();
-  Value *Result = nullptr;
-  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(Ty)) {
-    Result = UndefValue::get(Ty);
-    Constant *CV = cast<Constant>(V);
-    IRBuilder<> Builder(CI);
-    for (unsigned i=0;i<VT->getNumElements();i++) {
-      ConstantFP *fpV = cast<ConstantFP>(CV->getAggregateElement(i));
-      Value *EltResult = EvalUnaryIntrinsic(fpV, floatEvalFunc, doubleEvalFunc);
-      Result = Builder.CreateInsertElement(Result, EltResult, i);
-    }
-  } else {
-    ConstantFP *fpV = cast<ConstantFP>(V);
-    Result = EvalUnaryIntrinsic(fpV, floatEvalFunc, doubleEvalFunc);
-  }
-  CI->replaceAllUsesWith(Result);
-  CI->eraseFromParent();
-  return Result;
-}
-
-static Value * EvalBinaryIntrinsic(CallInst *CI,
-                               FloatBinaryEvalFuncType floatEvalFunc,
-                               DoubleBinaryEvalFuncType doubleEvalFunc,
-                               IntBinaryEvalFuncType intEvalFunc = nullptr) {
-  Value *V0 = CI->getArgOperand(0);
-  Value *V1 = CI->getArgOperand(1);
-  llvm::Type *Ty = CI->getType();
-  Value *Result = nullptr;
-  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(Ty)) {
-    Result = UndefValue::get(Ty);
-    Constant *CV0 = cast<Constant>(V0);
-    Constant *CV1 = cast<Constant>(V1);
-    IRBuilder<> Builder(CI);
-    for (unsigned i=0;i<VT->getNumElements();i++) {
-      Constant *cV0 = cast<Constant>(CV0->getAggregateElement(i));
-      Constant *cV1 = cast<Constant>(CV1->getAggregateElement(i));
-      Value *EltResult = EvalBinaryIntrinsic(cV0, cV1, floatEvalFunc, doubleEvalFunc, intEvalFunc);
-      Result = Builder.CreateInsertElement(Result, EltResult, i);
-    }
-  } else {
-    Constant *cV0 = cast<Constant>(V0);
-    Constant *cV1 = cast<Constant>(V1);
-    Result = EvalBinaryIntrinsic(cV0, cV1, floatEvalFunc, doubleEvalFunc, intEvalFunc);
-  }
-  CI->replaceAllUsesWith(Result);
-  CI->eraseFromParent();
-  return Result;
-
-  CI->eraseFromParent();
-  return Result;
-}
-
-static Value * TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp) {
-  switch (intriOp) {
-  case IntrinsicOp::IOP_tan: {
-    return EvalUnaryIntrinsic(CI, tanf, tan);
-  } break;
-  case IntrinsicOp::IOP_tanh: {
-    return EvalUnaryIntrinsic(CI, tanhf, tanh);
-  } break;
-  case IntrinsicOp::IOP_sin: {
-    return EvalUnaryIntrinsic(CI, sinf, sin);
-  } break;
-  case IntrinsicOp::IOP_sinh: {
-    return EvalUnaryIntrinsic(CI, sinhf, sinh);
-  } break;
-  case IntrinsicOp::IOP_cos: {
-    return EvalUnaryIntrinsic(CI, cosf, cos);
-  } break;
-  case IntrinsicOp::IOP_cosh: {
-    return EvalUnaryIntrinsic(CI, coshf, cosh);
-  } break;
-  case IntrinsicOp::IOP_asin: {
-    return EvalUnaryIntrinsic(CI, asinf, asin);
-  } break;
-  case IntrinsicOp::IOP_acos: {
-    return EvalUnaryIntrinsic(CI, acosf, acos);
-  } break;
-  case IntrinsicOp::IOP_atan: {
-    return EvalUnaryIntrinsic(CI, atanf, atan);
-  } break;
-  case IntrinsicOp::IOP_atan2: {
-    Value *V0 = CI->getArgOperand(0);
-    ConstantFP *fpV0 = cast<ConstantFP>(V0);
-
-    Value *V1 = CI->getArgOperand(1);
-    ConstantFP *fpV1 = cast<ConstantFP>(V1);
-
-    llvm::Type *Ty = CI->getType();
-    Value *Result = nullptr;
-    if (Ty->isDoubleTy()) {
-      double dV0 = fpV0->getValueAPF().convertToDouble();
-      double dV1 = fpV1->getValueAPF().convertToDouble();
-      Value *atanV = ConstantFP::get(CI->getType(), atan2(dV0, dV1));
-      CI->replaceAllUsesWith(atanV);
-      Result = atanV;
-    } else {
-      DXASSERT_NOMSG(Ty->isFloatTy());
-      float fV0 = fpV0->getValueAPF().convertToFloat();
-      float fV1 = fpV1->getValueAPF().convertToFloat();
-      Value *atanV = ConstantFP::get(CI->getType(), atan2f(fV0, fV1));
-      CI->replaceAllUsesWith(atanV);
-      Result = atanV;
-    }
-    CI->eraseFromParent();
-    return Result;
-  } break;
-  case IntrinsicOp::IOP_sqrt: {
-    return EvalUnaryIntrinsic(CI, sqrtf, sqrt);
-  } break;
-  case IntrinsicOp::IOP_rsqrt: {
-    auto rsqrtF = [](float v) -> float { return 1.0 / sqrtf(v); };
-    auto rsqrtD = [](double v) -> double { return 1.0 / sqrt(v); };
-
-    return EvalUnaryIntrinsic(CI, rsqrtF, rsqrtD);
-  } break;
-  case IntrinsicOp::IOP_exp: {
-    return EvalUnaryIntrinsic(CI, expf, exp);
-  } break;
-  case IntrinsicOp::IOP_exp2: {
-    return EvalUnaryIntrinsic(CI, exp2f, exp2);
-  } break;
-  case IntrinsicOp::IOP_log: {
-    return EvalUnaryIntrinsic(CI, logf, log);
-  } break;
-  case IntrinsicOp::IOP_log10: {
-    return EvalUnaryIntrinsic(CI, log10f, log10);
-  } break;
-  case IntrinsicOp::IOP_log2: {
-    return EvalUnaryIntrinsic(CI, log2f, log2);
-  } break;
-  case IntrinsicOp::IOP_pow: {
-    return EvalBinaryIntrinsic(CI, powf, pow);
-  } break;
-  case IntrinsicOp::IOP_max: {
-    auto maxF = [](float a, float b) -> float { return a > b ? a:b; };
-    auto maxD = [](double a, double b) -> double { return a > b ? a:b; };
-    auto imaxI = [](const APInt &a, const APInt &b) -> APInt { return a.sgt(b) ? a : b; };
-    return EvalBinaryIntrinsic(CI, maxF, maxD, imaxI);
-  } break;
-  case IntrinsicOp::IOP_min: {
-    auto minF = [](float a, float b) -> float { return a < b ? a:b; };
-    auto minD = [](double a, double b) -> double { return a < b ? a:b; };
-    auto iminI = [](const APInt &a, const APInt &b) -> APInt { return a.slt(b) ? a : b; };
-    return EvalBinaryIntrinsic(CI, minF, minD, iminI);
-  } break;
-  case IntrinsicOp::IOP_umax: {
-    DXASSERT_NOMSG(CI->getArgOperand(0)->getType()->getScalarType()->isIntegerTy());
-    auto umaxI = [](const APInt &a, const APInt &b) -> APInt { return a.ugt(b) ? a : b; };
-    return EvalBinaryIntrinsic(CI, nullptr, nullptr, umaxI);
-  } break;
-  case IntrinsicOp::IOP_umin: {
-    DXASSERT_NOMSG(CI->getArgOperand(0)->getType()->getScalarType()->isIntegerTy());
-    auto uminI = [](const APInt &a, const APInt &b) -> APInt { return a.ult(b) ? a : b; };
-    return EvalBinaryIntrinsic(CI, nullptr, nullptr, uminI);
-  } break;
-  case IntrinsicOp::IOP_rcp: {
-    auto rcpF = [](float v) -> float { return 1.0 / v; };
-    auto rcpD = [](double v) -> double { return 1.0 / v; };
-
-    return EvalUnaryIntrinsic(CI, rcpF, rcpD);
-  } break;
-  case IntrinsicOp::IOP_ceil: {
-    return EvalUnaryIntrinsic(CI, ceilf, ceil);
-  } break;
-  case IntrinsicOp::IOP_floor: {
-    return EvalUnaryIntrinsic(CI, floorf, floor);
-  } break;
-  case IntrinsicOp::IOP_round: {
-    return EvalUnaryIntrinsic(CI, roundf, round);
-  } break;
-  case IntrinsicOp::IOP_trunc: {
-    return EvalUnaryIntrinsic(CI, truncf, trunc);
-  } break;
-  case IntrinsicOp::IOP_frac: {
-    auto fracF = [](float v) -> float {
-      return v - floor(v);
-    };
-    auto fracD = [](double v) -> double {
-      return v - floor(v);
-    };
-
-    return EvalUnaryIntrinsic(CI, fracF, fracD);
-  } break;
-  case IntrinsicOp::IOP_isnan: {
-    Value *V = CI->getArgOperand(0);
-    ConstantFP *fV = cast<ConstantFP>(V);
-    bool isNan = fV->getValueAPF().isNaN();
-    Constant *cNan = ConstantInt::get(CI->getType(), isNan ? 1 : 0);
-    CI->replaceAllUsesWith(cNan);
-    CI->eraseFromParent();
-    return cNan;
-  } break;
-  default:
-    return nullptr;
-  }
-}
-
-static void SimpleTransformForHLDXIR(Instruction *I,
-                                     SmallInstSet &deadInsts) {
-
-  unsigned opcode = I->getOpcode();
-  switch (opcode) {
-  case Instruction::BitCast: {
-    BitCastOperator *BCI = cast<BitCastOperator>(I);
-    SimplifyBitCast(BCI, deadInsts);
-  } break;
-  case Instruction::Load: {
-    LoadInst *ldInst = cast<LoadInst>(I);
-    DXASSERT(!HLMatrixType::isa(ldInst->getType()),
-                      "matrix load should use HL LdStMatrix");
-    Value *Ptr = ldInst->getPointerOperand();
-    if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(Ptr)) {
-      if (BitCastOperator *BCO = dyn_cast<BitCastOperator>(CE)) {
-        SimplifyBitCast(BCO, deadInsts);
-      }
-    }
-  } break;
-  case Instruction::Store: {
-    StoreInst *stInst = cast<StoreInst>(I);
-    Value *V = stInst->getValueOperand();
-    DXASSERT_LOCALVAR(V, !HLMatrixType::isa(V->getType()),
-                      "matrix store should use HL LdStMatrix");
-    Value *Ptr = stInst->getPointerOperand();
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
-      if (BitCastOperator *BCO = dyn_cast<BitCastOperator>(CE)) {
-        SimplifyBitCast(BCO, deadInsts);
-      }
-    }
-  } break;
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::Shl: {
-    llvm::BinaryOperator *BO = cast<llvm::BinaryOperator>(I);
-    Value *op2 = BO->getOperand(1);
-    IntegerType *Ty = cast<IntegerType>(BO->getType()->getScalarType());
-    unsigned bitWidth = Ty->getBitWidth();
-    // Clamp op2 to 0 ~ bitWidth-1
-    if (ConstantInt *cOp2 = dyn_cast<ConstantInt>(op2)) {
-      unsigned iOp2 = cOp2->getLimitedValue();
-      unsigned clampedOp2 = iOp2 & (bitWidth - 1);
-      if (iOp2 != clampedOp2) {
-        BO->setOperand(1, ConstantInt::get(op2->getType(), clampedOp2));
-      }
-    } else {
-      Value *mask = ConstantInt::get(op2->getType(), bitWidth - 1);
-      IRBuilder<> Builder(I);
-      op2 = Builder.CreateAnd(op2, mask);
-      BO->setOperand(1, op2);
-    }
-  } break;
-  }
-}
-
-// Do simple transform to make later lower pass easier.
-static void SimpleTransformForHLDXIR(llvm::Module *pM) {
-  SmallInstSet deadInsts;
-  for (Function &F : pM->functions()) {
-    for (BasicBlock &BB : F.getBasicBlockList()) {
-      for (BasicBlock::iterator Iter = BB.begin(); Iter != BB.end(); ) {
-        Instruction *I = (Iter++);
-        if (deadInsts.count(I))
-          continue; // Skip dead instructions
-        SimpleTransformForHLDXIR(I, deadInsts);
-      }
-    }
-  }
-
-  for (Instruction * I : deadInsts)
-    I->dropAllReferences();
-  for (Instruction * I : deadInsts)
-    I->eraseFromParent();
-  deadInsts.clear();
-
-  for (GlobalVariable &GV : pM->globals()) {
-    if (dxilutil::IsStaticGlobal(&GV)) {
-      for (User *U : GV.users()) {
-        if (BitCastOperator *BCO = dyn_cast<BitCastOperator>(U)) {
-          SimplifyBitCast(BCO, deadInsts);
-        }
-      }
-    }
-  }
-
-  for (Instruction * I : deadInsts)
-    I->dropAllReferences();
-  for (Instruction * I : deadInsts)
-    I->eraseFromParent();
-}
-
-static Function *CloneFunction(Function *Orig,
-                        const llvm::Twine &Name,
-                        llvm::Module *llvmModule,
-                        hlsl::DxilTypeSystem &TypeSys,
-                        hlsl::DxilTypeSystem &SrcTypeSys) {
-
-  Function *F = Function::Create(Orig->getFunctionType(),
-                                 GlobalValue::LinkageTypes::ExternalLinkage,
-                                 Name, llvmModule);
-
-  SmallVector<ReturnInst *, 2> Returns;
-  ValueToValueMapTy vmap;
-  // Map params.
-  auto entryParamIt = F->arg_begin();
-  for (Argument &param : Orig->args()) {
-    vmap[&param] = (entryParamIt++);
-  }
-
-  llvm::CloneFunctionInto(F, Orig, vmap, /*ModuleLevelChagnes*/ false, Returns);
-  TypeSys.CopyFunctionAnnotation(F, Orig, SrcTypeSys);
-
-  return F;
-}
-
-// Clone shader entry function to be called by other functions.
-// The original function will be used as shader entry.
-static void CloneShaderEntry(Function *ShaderF, StringRef EntryName,
-                             HLModule &HLM) {
-  Function *F = CloneFunction(ShaderF, "", HLM.GetModule(),
-                              HLM.GetTypeSystem(), HLM.GetTypeSystem());
-
-  F->takeName(ShaderF);
-  F->setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
-  // Set to name before mangled.
-  ShaderF->setName(EntryName);
-
-  DxilFunctionAnnotation *annot = HLM.GetFunctionAnnotation(F);
-  DxilParameterAnnotation &cloneRetAnnot = annot->GetRetTypeAnnotation();
-  // Clear semantic for cloned one.
-  cloneRetAnnot.SetSemanticString("");
-  cloneRetAnnot.SetSemanticIndexVec({});
-  for (unsigned i = 0; i < annot->GetNumParameters(); i++) {
-    DxilParameterAnnotation &cloneParamAnnot = annot->GetParameterAnnotation(i);
-    // Clear semantic for cloned one.
-    cloneParamAnnot.SetSemanticString("");
-    cloneParamAnnot.SetSemanticIndexVec({});
-  }
-}
-
-// For case like:
-//cbuffer A {
-//  float a;
-//  int b;
-//}
-//
-//const static struct {
-//  float a;
-//  int b;
-//}  ST = { a, b };
-// Replace user of ST with a and b.
-static bool ReplaceConstStaticGlobalUser(GEPOperator *GEP,
-                                         std::vector<Constant *> &InitList,
-                                         IRBuilder<> &Builder) {
-  if (GEP->getNumIndices() < 2) {
-    // Don't use sub element.
-    return false;
-  }
-
-  SmallVector<Value *, 4> idxList;
-  auto iter = GEP->idx_begin();
-  idxList.emplace_back(*(iter++));
-  ConstantInt *subIdx = dyn_cast<ConstantInt>(*(iter++));
-
-  DXASSERT(subIdx, "else dynamic indexing on struct field");
-  unsigned subIdxImm = subIdx->getLimitedValue();
-  DXASSERT(subIdxImm < InitList.size(), "else struct index out of bound");
-
-  Constant *subPtr = InitList[subIdxImm];
-  // Move every idx to idxList except idx for InitList.
-  while (iter != GEP->idx_end()) {
-    idxList.emplace_back(*(iter++));
-  }
-  Value *NewGEP = Builder.CreateGEP(subPtr, idxList);
-  GEP->replaceAllUsesWith(NewGEP);
-  return true;
-}
-
-static void ReplaceConstStaticGlobals(
-    std::unordered_map<GlobalVariable *, std::vector<Constant *>>
-        &staticConstGlobalInitListMap,
-    std::unordered_map<GlobalVariable *, Function *>
-        &staticConstGlobalCtorMap) {
-
-  for (auto &iter : staticConstGlobalInitListMap) {
-    GlobalVariable *GV = iter.first;
-    std::vector<Constant *> &InitList = iter.second;
-    LLVMContext &Ctx = GV->getContext();
-    // Do the replace.
-    bool bPass = true;
-    for (User *U : GV->users()) {
-      IRBuilder<> Builder(Ctx);
-      if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
-        Builder.SetInsertPoint(GEPInst);
-        bPass &= ReplaceConstStaticGlobalUser(cast<GEPOperator>(GEPInst), InitList, Builder);
-      } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-        bPass &= ReplaceConstStaticGlobalUser(GEP, InitList, Builder);
-      } else {
-        DXASSERT(false, "invalid user of const static global");
-      }
-    }
-    // Clear the Ctor which is useless now.
-    if (bPass) {
-      Function *Ctor = staticConstGlobalCtorMap[GV];
-      Ctor->getBasicBlockList().clear();
-      BasicBlock *Entry = BasicBlock::Create(Ctx, "", Ctor);
-      IRBuilder<> Builder(Entry);
-      Builder.CreateRetVoid();
-    }
-  }
-}
-
-bool BuildImmInit(Function *Ctor) {
-  GlobalVariable *GV = nullptr;
-  SmallVector<Constant *, 4> ImmList;
-  bool allConst = true;
-  for (inst_iterator I = inst_begin(Ctor), E = inst_end(Ctor); I != E; ++I) {
-    if (StoreInst *SI = dyn_cast<StoreInst>(&(*I))) {
-      Value *V = SI->getValueOperand();
-      if (!isa<Constant>(V) || V->getType()->isPointerTy()) {
-        allConst = false;
-        break;
-      }
-      ImmList.emplace_back(cast<Constant>(V));
-      Value *Ptr = SI->getPointerOperand();
-      if (GEPOperator *GepOp = dyn_cast<GEPOperator>(Ptr)) {
-        Ptr = GepOp->getPointerOperand();
-        if (GlobalVariable *pGV = dyn_cast<GlobalVariable>(Ptr)) {
-          if (GV == nullptr)
-            GV = pGV;
-          else {
-            DXASSERT(GV == pGV, "else pointer mismatch");
-          }
-        }
-      }
-    } else {
-      if (!isa<ReturnInst>(*I)) {
-        allConst = false;
-        break;
-      }
-    }
-  }
-  if (!allConst)
-    return false;
-  if (!GV)
-    return false;
-
-  llvm::Type *Ty = GV->getType()->getElementType();
-  llvm::ArrayType *AT = dyn_cast<llvm::ArrayType>(Ty);
-  // TODO: support other types.
-  if (!AT)
-    return false;
-  if (ImmList.size() != AT->getNumElements())
-    return false;
-  Constant *Init = llvm::ConstantArray::get(AT, ImmList);
-  GV->setInitializer(Init);
-  return true;
-}
-
-void ProcessCtorFunctions(llvm::Module &M, StringRef globalName,
-                          Instruction *InsertPt) {
-  // add global call to entry func
-  GlobalVariable *GV = M.getGlobalVariable(globalName);
-  if (GV) {
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer())) {
-
-      IRBuilder<> Builder(InsertPt);
-      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e;
-           ++i) {
-        if (isa<ConstantAggregateZero>(*i))
-          continue;
-        ConstantStruct *CS = cast<ConstantStruct>(*i);
-        if (isa<ConstantPointerNull>(CS->getOperand(1)))
-          continue;
-
-        // Must have a function or null ptr.
-        if (!isa<Function>(CS->getOperand(1)))
-          continue;
-        Function *Ctor = cast<Function>(CS->getOperand(1));
-        DXASSERT(Ctor->getReturnType()->isVoidTy() && Ctor->arg_size() == 0,
-               "function type must be void (void)");
-
-        for (inst_iterator I = inst_begin(Ctor), E = inst_end(Ctor); I != E;
-             ++I) {
-          if (CallInst *CI = dyn_cast<CallInst>(&(*I))) {
-            Function *F = CI->getCalledFunction();
-            // Try to build imm initilizer.
-            // If not work, add global call to entry func.
-            if (BuildImmInit(F) == false) {
-              Builder.CreateCall(F);
-            }
-          } else {
-            DXASSERT(isa<ReturnInst>(&(*I)),
-                     "else invalid Global constructor function");
-          }
-        }
-      }
-      // remove the GV
-      GV->eraseFromParent();
-    }
-  }
-}
-
-void CGMSHLSLRuntime::SetPatchConstantFunction(const EntryFunctionInfo &EntryFunc) {
-
-  auto AttrsIter = HSEntryPatchConstantFuncAttr.find(EntryFunc.Func);
-
-  DXASSERT(AttrsIter != HSEntryPatchConstantFuncAttr.end(),
-           "we have checked this in AddHLSLFunctionInfo()");
-
-  SetPatchConstantFunctionWithAttr(Entry, AttrsIter->second);
-}
-
-void CGMSHLSLRuntime::SetPatchConstantFunctionWithAttr(
-    const EntryFunctionInfo &EntryFunc,
-    const clang::HLSLPatchConstantFuncAttr *PatchConstantFuncAttr) {
-  StringRef funcName = PatchConstantFuncAttr->getFunctionName();
-
-  auto Entry = patchConstantFunctionMap.find(funcName);
-  if (Entry == patchConstantFunctionMap.end()) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
-    unsigned DiagID =
-      Diags.getCustomDiagID(DiagnosticsEngine::Error,
-        "Cannot find patchconstantfunc %0.");
-    Diags.Report(PatchConstantFuncAttr->getLocation(), DiagID)
-      << funcName;
-    return;
-  }
-
-  if (Entry->second.NumOverloads != 1) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
-    unsigned DiagID =
-      Diags.getCustomDiagID(DiagnosticsEngine::Warning,
-        "Multiple overloads of patchconstantfunc %0.");
-    unsigned NoteID =
-      Diags.getCustomDiagID(DiagnosticsEngine::Note,
-        "This overload was selected.");
-    Diags.Report(PatchConstantFuncAttr->getLocation(), DiagID)
-      << funcName;
-    Diags.Report(Entry->second.SL, NoteID);
-  }
-
-  Function *patchConstFunc = Entry->second.Func;
-  DXASSERT(m_pHLModule->HasDxilFunctionProps(EntryFunc.Func),
-    " else AddHLSLFunctionInfo did not save the dxil function props for the "
-    "HS entry.");
-  DxilFunctionProps *HSProps = &m_pHLModule->GetDxilFunctionProps(EntryFunc.Func);
-  m_pHLModule->SetPatchConstantFunctionForHS(EntryFunc.Func, patchConstFunc);
-  DXASSERT_NOMSG(patchConstantFunctionPropsMap.count(patchConstFunc));
-  // Check no inout parameter for patch constant function.
-  DxilFunctionAnnotation *patchConstFuncAnnotation =
-    m_pHLModule->GetFunctionAnnotation(patchConstFunc);
-  for (unsigned i = 0; i < patchConstFuncAnnotation->GetNumParameters(); i++) {
-    if (patchConstFuncAnnotation->GetParameterAnnotation(i)
-      .GetParamInputQual() == DxilParamInputQual::Inout) {
-      DiagnosticsEngine &Diags = CGM.getDiags();
-      unsigned DiagID = Diags.getCustomDiagID(
-        DiagnosticsEngine::Error,
-        "Patch Constant function %0 should not have inout param.");
-      Diags.Report(Entry->second.SL, DiagID) << funcName;
-    }
-  }
-  
-  // Input/Output control point validation.
-  if (patchConstantFunctionPropsMap.count(patchConstFunc)) {
-    const DxilFunctionProps &patchProps =
-      *patchConstantFunctionPropsMap[patchConstFunc];
-    if (patchProps.ShaderProps.HS.inputControlPoints != 0 &&
-      patchProps.ShaderProps.HS.inputControlPoints !=
-      HSProps->ShaderProps.HS.inputControlPoints) {
-      DiagnosticsEngine &Diags = CGM.getDiags();
-      unsigned DiagID =
-        Diags.getCustomDiagID(DiagnosticsEngine::Error,
-          "Patch constant function's input patch input "
-          "should have %0 elements, but has %1.");
-      Diags.Report(Entry->second.SL, DiagID)
-        << HSProps->ShaderProps.HS.inputControlPoints
-        << patchProps.ShaderProps.HS.inputControlPoints;
-    }
-    if (patchProps.ShaderProps.HS.outputControlPoints != 0 &&
-      patchProps.ShaderProps.HS.outputControlPoints !=
-      HSProps->ShaderProps.HS.outputControlPoints) {
-      DiagnosticsEngine &Diags = CGM.getDiags();
-      unsigned DiagID = Diags.getCustomDiagID(
-        DiagnosticsEngine::Error,
-        "Patch constant function's output patch input "
-        "should have %0 elements, but has %1.");
-      Diags.Report(Entry->second.SL, DiagID)
-        << HSProps->ShaderProps.HS.outputControlPoints
-        << patchProps.ShaderProps.HS.outputControlPoints;
-    }
-  }
-  
-}
-
-static void ReportDisallowedTypeInExportParam(CodeGenModule &CGM, StringRef name) {
-  DiagnosticsEngine &Diags = CGM.getDiags();
-  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-    "Exported function %0 must not contain a resource in parameter or return type.");
-  std::string escaped;
-  llvm::raw_string_ostream os(escaped);
-  dxilutil::PrintEscapedString(name, os);
-  Diags.Report(DiagID) << os.str();
-}
-
-// Returns true a global value is being updated
-static bool GlobalHasStoreUserRec(Value *V, std::set<Value *> &visited) {
-  bool isWriteEnabled = false;
-  if (V && visited.find(V) == visited.end()) {
-    visited.insert(V);
-    for (User *U : V->users()) {
-      if (isa<StoreInst>(U)) {
-        return true;
-      } else if (CallInst* CI = dyn_cast<CallInst>(U)) {
-        Function *F = CI->getCalledFunction();
-        if (!F->isIntrinsic()) {
-          HLOpcodeGroup hlGroup = GetHLOpcodeGroup(F);
-          switch (hlGroup) {
-          case HLOpcodeGroup::NotHL:
-            return true;
-          case HLOpcodeGroup::HLMatLoadStore:
-          {
-            HLMatLoadStoreOpcode opCode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
-            if (opCode == HLMatLoadStoreOpcode::ColMatStore || opCode == HLMatLoadStoreOpcode::RowMatStore)
-              return true;
-            break;
-          }
-          case HLOpcodeGroup::HLCast:
-          case HLOpcodeGroup::HLSubscript:
-            if (GlobalHasStoreUserRec(U, visited))
-              return true;
-            break;
-          default:
-            break;
-          }
-        }
-      } else if (isa<GEPOperator>(U) || isa<PHINode>(U) || isa<SelectInst>(U)) {
-        if (GlobalHasStoreUserRec(U, visited))
-          return true;
-      }
-    }
-  }
-  return isWriteEnabled;
-}
-
-// Returns true if any of the direct user of a global is a store inst
-// otherwise recurse through the remaining users and check if any GEP
-// exists and which in turn has a store inst as user.
-static bool GlobalHasStoreUser(GlobalVariable *GV) {
-  std::set<Value *> visited;
-  Value *V = cast<Value>(GV);
-  return GlobalHasStoreUserRec(V, visited);
-}
-
-static GlobalVariable *CreateStaticGlobal(llvm::Module *M, GlobalVariable *GV) {
-  Constant *GC = M->getOrInsertGlobal(GV->getName().str() + ".static.copy",
-                                      GV->getType()->getPointerElementType());
-  GlobalVariable *NGV = cast<GlobalVariable>(GC);
-  if (GV->hasInitializer()) {
-    NGV->setInitializer(GV->getInitializer());
-  } else {
-    // The copy being static, it should be initialized per llvm rules
-    NGV->setInitializer(Constant::getNullValue(GV->getType()->getPointerElementType()));
-  }
-  // static global should have internal linkage
-  NGV->setLinkage(GlobalValue::InternalLinkage);
-  return NGV;
-}
-
-static void CreateWriteEnabledStaticGlobals(llvm::Module *M,
-                                            llvm::Function *EF) {
-  std::vector<GlobalVariable *> worklist;
-  for (GlobalVariable &GV : M->globals()) {
-    if (!GV.isConstant() && GV.getLinkage() != GlobalValue::InternalLinkage &&
-        // skip globals which are HLSL objects or group shared
-        !dxilutil::IsHLSLObjectType(GV.getType()->getPointerElementType()) &&
-        !dxilutil::IsSharedMemoryGlobal(&GV)) {
-      if (GlobalHasStoreUser(&GV))
-        worklist.emplace_back(&GV);
-      // TODO: Ensure that constant globals aren't using initializer
-      GV.setConstant(true);
-    }
-  }
-
-  IRBuilder<> Builder(
-      dxilutil::FirstNonAllocaInsertionPt(&EF->getEntryBlock()));
-  for (GlobalVariable *GV : worklist) {
-    GlobalVariable *NGV = CreateStaticGlobal(M, GV);
-    GV->replaceAllUsesWith(NGV);
-
-    // insert memcpy in all entryblocks
-    uint64_t size = M->getDataLayout().getTypeAllocSize(
-        GV->getType()->getPointerElementType());
-    Builder.CreateMemCpy(NGV, GV, size, 1);
-  }
-}
-
-// Translate RayQuery constructor.  From:
-//  %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
-// To:
-//  i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32 %flags)
-//  %gep = GEP %"RayQuery<flags>" %ptr, 0, 0
-//  store i32* %gep, i32 %handle
-//  ; and replace uses of %call with %ptr
-void TranslateRayQueryConstructor(llvm::Module &M) {
-  SmallVector<Function*, 4> Constructors;
-  for (auto &F : M.functions()) {
-    // Match templated RayQuery constructor instantiation by prefix and signature.
-    // It should be impossible to achieve the same signature from HLSL.
-    if (!F.getName().startswith("\01??0?$RayQuery@$"))
-      continue;
-    llvm::Type *Ty = F.getReturnType();
-    if (!Ty->isPointerTy() || !dxilutil::IsHLSLRayQueryType(Ty->getPointerElementType()))
-      continue;
-    if (F.arg_size() != 1 || Ty != F.arg_begin()->getType())
-      continue;
-    Constructors.emplace_back(&F);
-  }
-
-  for (auto pConstructorFunc : Constructors) {
-    llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
-    llvm::ConstantInt *i32Zero = llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
-    llvm::FunctionType *funcTy = llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
-    unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
-    llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
-    Function *opFunc = GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
-
-    while (!pConstructorFunc->user_empty()) {
-      Value *V = *pConstructorFunc->user_begin();
-      llvm::CallInst *CI = cast<CallInst>(V); // Must be call
-      llvm::Value *pThis = CI->getArgOperand(0);
-      llvm::StructType *pRQType = cast<llvm::StructType>(pThis->getType()->getPointerElementType());
-      DxilStructAnnotation *SA = M.GetHLModule().GetTypeSystem().GetStructAnnotation(pRQType);
-      DXASSERT(SA, "otherwise, could not find type annoation for RayQuery specialization");
-      DXASSERT(SA->GetNumTemplateArgs() == 1 && SA->GetTemplateArgAnnotation(0).IsIntegral(),
-                "otherwise, RayQuery has changed, or lacks template args");
-      llvm::IRBuilder<> Builder(CI);
-      llvm::Value *rayFlags = Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
-      llvm::Value *Call = Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
-      llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
-      Builder.CreateStore(Call, GEP);
-      CI->replaceAllUsesWith(pThis);
-      CI->eraseFromParent();
-    }
-    pConstructorFunc->eraseFromParent();
-  }
-}
-
-void CGMSHLSLRuntime::FinishCodeGen() {
-  // Lower getResourceHeap before AddOpcodeParamForIntrinsics to skip automatic
-  // lower for getResourceFromHeap.
-  LowerGetResourceFromHeap(*m_pHLModule, m_IntrinsicMap);
-  // translate opcode into parameter for intrinsic functions
-  // Do this before CloneShaderEntry and TranslateRayQueryConstructor to avoid
-  // update valToResPropertiesMap for cloned inst.
-  AddOpcodeParamForIntrinsics(*m_pHLModule, m_IntrinsicMap, resMetadataMap);
-
-  // Library don't have entry.
-  if (!m_bIsLib) {
-    SetEntryFunction();
-
-    // If at this point we haven't determined the entry function it's an error.
-    if (m_pHLModule->GetEntryFunction() == nullptr) {
-      assert(CGM.getDiags().hasErrorOccurred() &&
-             "else SetEntryFunction should have reported this condition");
-      return;
-    }
-
-    // In back-compat mode (with /Gec flag) create a static global for each const global
-    // to allow writing to it.
-    // TODO: Verfiy the behavior of static globals in hull shader
-    if(CGM.getLangOpts().EnableDX9CompatMode && CGM.getLangOpts().HLSLVersion <= 2016)
-      CreateWriteEnabledStaticGlobals(m_pHLModule->GetModule(), m_pHLModule->GetEntryFunction());
-    if (m_pHLModule->GetShaderModel()->IsHS()) {
-      SetPatchConstantFunction(Entry);
-    }
-  } else {
-    for (auto &it : entryFunctionMap) {
-      // skip clone if RT entry
-      if (m_pHLModule->GetDxilFunctionProps(it.second.Func).IsRay())
-        continue;
-
-      // TODO: change flattened function names to dx.entry.<name>:
-      // std::string entryName = (Twine(dxilutil::EntryPrefix) +
-      // it.getKey()).str();
-      CloneShaderEntry(it.second.Func, it.getKey(), *m_pHLModule);
-
-      auto AttrIter = HSEntryPatchConstantFuncAttr.find(it.second.Func);
-      if (AttrIter != HSEntryPatchConstantFuncAttr.end()) {
-        SetPatchConstantFunctionWithAttr(it.second, AttrIter->second);
-      }
-    }
-  }
-
-  ReplaceConstStaticGlobals(staticConstGlobalInitListMap,
-                            staticConstGlobalCtorMap);
-
-  // Create copy for clip plane.
-  for (Function *F : clipPlaneFuncList) {
-    DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(F);
-    IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
-
-    for (unsigned i = 0; i < DXIL::kNumClipPlanes; i++) {
-      Value *clipPlane = props.ShaderProps.VS.clipPlanes[i];
-      if (!clipPlane)
-        continue;
-      if (m_bDebugInfo) {
-        Builder.SetCurrentDebugLocation(debugInfoMap[clipPlane]);
-      }
-      llvm::Type *Ty = clipPlane->getType()->getPointerElementType();
-      // Constant *zeroInit = ConstantFP::get(Ty, 0);
-      GlobalVariable *GV = new llvm::GlobalVariable(
-          TheModule, Ty, /*IsConstant*/ false, // constant false to store.
-          llvm::GlobalValue::ExternalLinkage,
-          /*InitVal*/ nullptr, Twine("SV_ClipPlane") + Twine(i));
-      Value *initVal = Builder.CreateLoad(clipPlane);
-      Builder.CreateStore(initVal, GV);
-      props.ShaderProps.VS.clipPlanes[i] = GV;
-    }
-  }
-
-  // Add Reg bindings for resource in cb.
-  AddRegBindingsForResourceInConstantBuffer(m_pHLModule, constantRegBindingMap);
-
-  // Allocate constant buffers.
-  AllocateDxilConstantBuffers(m_pHLModule, m_ConstVarAnnotationMap);
-  // TODO: create temp variable for constant which has store use.
-
-  // Create Global variable and type annotation for each CBuffer.
-  ConstructCBuffer(m_pHLModule, CBufferType, m_ConstVarAnnotationMap);
-
-  // Translate calls to RayQuery constructor into hl Allocate calls
-  TranslateRayQueryConstructor(*m_pHLModule->GetModule());
-
-  if (!m_bIsLib) {
-    // need this for "llvm.global_dtors"?
-    ProcessCtorFunctions(TheModule ,"llvm.global_ctors",
-                  Entry.Func->getEntryBlock().getFirstInsertionPt());
-  }
-
-  // Register patch constant functions referenced by exported Hull Shaders
-  if (m_bIsLib && !m_ExportMap.empty()) {
-    for (auto &it : entryFunctionMap) {
-      if (m_pHLModule->HasDxilFunctionProps(it.second.Func)) {
-        const DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(it.second.Func);
-        if (props.IsHS())
-          m_ExportMap.RegisterExportedFunction(props.ShaderProps.HS.patchConstantFunc);
-      }
-    }
-  }
-
-  // Pin entry point and constant buffers, mark everything else internal.
-  for (Function &f : m_pHLModule->GetModule()->functions()) {
-    if (!m_bIsLib) {
-      if (&f == m_pHLModule->GetEntryFunction() ||
-          IsPatchConstantFunction(&f) || f.isDeclaration()) {
-        if (f.isDeclaration() && !f.isIntrinsic() &&
-            GetHLOpcodeGroup(&f) == HLOpcodeGroup::NotHL) {
-          DiagnosticsEngine &Diags = CGM.getDiags();
-          unsigned DiagID = Diags.getCustomDiagID(
-              DiagnosticsEngine::Error,
-              "External function used in non-library profile: %0");
-          std::string escaped;
-          llvm::raw_string_ostream os(escaped);
-          dxilutil::PrintEscapedString(f.getName(), os);
-          Diags.Report(DiagID) << os.str();
-          return;
-        }
-        f.setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
-      } else {
-        f.setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
-      }
-    }
-    // Skip no inline functions.
-    if (f.hasFnAttribute(llvm::Attribute::NoInline))
-      continue;
-    // Always inline for used functions.
-    if (!f.user_empty() && !f.isDeclaration())
-      f.addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-
-  if (m_bIsLib && !m_ExportMap.empty()) {
-    m_ExportMap.BeginProcessing();
-    for (Function &f : m_pHLModule->GetModule()->functions()) {
-      if (f.isDeclaration() || f.isIntrinsic() ||
-        GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL)
-        continue;
-      m_ExportMap.ProcessFunction(&f, true);
-    }
-    // TODO: add subobject export names here.
-    if (!m_ExportMap.EndProcessing()) {
-      for (auto &name : m_ExportMap.GetNameCollisions()) {
-        DiagnosticsEngine &Diags = CGM.getDiags();
-        unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-          "Export name collides with another export: %0");
-        std::string escaped;
-        llvm::raw_string_ostream os(escaped);
-        dxilutil::PrintEscapedString(name, os);
-        Diags.Report(DiagID) << os.str();
-      }
-      for (auto &name : m_ExportMap.GetUnusedExports()) {
-        DiagnosticsEngine &Diags = CGM.getDiags();
-        unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-          "Could not find target for export: %0");
-        std::string escaped;
-        llvm::raw_string_ostream os(escaped);
-        dxilutil::PrintEscapedString(name, os);
-        Diags.Report(DiagID) << os.str();
-      }
-    }
-  }
-
-  for (auto &it : m_ExportMap.GetFunctionRenames()) {
-    Function *F = it.first;
-    auto &renames = it.second;
-
-    if (renames.empty())
-      continue;
-
-    // Rename the original, if necessary, then clone the rest
-    if (renames.find(F->getName()) == renames.end())
-      F->setName(*renames.begin());
-
-    for (auto &itName : renames) {
-      if (F->getName() != itName) {
-        Function *pClone = CloneFunction(F, itName, m_pHLModule->GetModule(),
-          m_pHLModule->GetTypeSystem(), m_pHLModule->GetTypeSystem());
-        // add DxilFunctionProps if entry
-        if (m_pHLModule->HasDxilFunctionProps(F)) {
-          DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(F);
-          auto newProps = llvm::make_unique<DxilFunctionProps>(props);
-          m_pHLModule->AddDxilFunctionProps(pClone, newProps);
-        }
-      }
-    }
-  }
-
-  if (CGM.getCodeGenOpts().ExportShadersOnly) {
-    for (Function &f : m_pHLModule->GetModule()->functions()) {
-      // Skip declarations, intrinsics, shaders, and non-external linkage
-      if (f.isDeclaration() || f.isIntrinsic() ||
-          GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL ||
-          m_pHLModule->HasDxilFunctionProps(&f) ||
-          m_pHLModule->IsPatchConstantShader(&f) ||
-          f.getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage)
-        continue;
-      // Mark non-shader user functions as InternalLinkage
-      f.setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
-    }
-  }
-
-  // Now iterate hull shaders and make sure their corresponding patch constant
-  // functions are marked ExternalLinkage:
-  for (Function &f : m_pHLModule->GetModule()->functions()) {
-    if (f.isDeclaration() || f.isIntrinsic() ||
-        GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL ||
-        f.getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage ||
-        !m_pHLModule->HasDxilFunctionProps(&f))
-      continue;
-    DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&f);
-    if (!props.IsHS())
-      continue;
-    Function *PCFunc = props.ShaderProps.HS.patchConstantFunc;
-    if (PCFunc->getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage)
-      PCFunc->setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
-  }
-
-  // Disallow resource arguments in (non-entry) function exports
-  // unless offline linking target.
-  if (m_bIsLib && m_pHLModule->GetShaderModel()->GetMinor() != ShaderModel::kOfflineMinor) {
-    for (Function &f : m_pHLModule->GetModule()->functions()) {
-      // Skip llvm intrinsics, non-external linkage, entry/patch constant func, and HL intrinsics
-      if (!f.isIntrinsic() &&
-          f.getLinkage() == GlobalValue::LinkageTypes::ExternalLinkage &&
-          !m_pHLModule->HasDxilFunctionProps(&f) &&
-          !m_pHLModule->IsPatchConstantShader(&f) &&
-          GetHLOpcodeGroup(&f) == HLOpcodeGroup::NotHL) {
-        // Verify no resources in param/return types
-        if (dxilutil::ContainsHLSLObjectType(f.getReturnType())) {
-          ReportDisallowedTypeInExportParam(CGM, f.getName());
-          continue;
-        }
-        for (auto &Arg : f.args()) {
-          if (dxilutil::ContainsHLSLObjectType(Arg.getType())) {
-            ReportDisallowedTypeInExportParam(CGM, f.getName());
-            break;
-          }
-        }
-      }
-    }
-  }
-
-  // Do simple transform to make later lower pass easier.
-  SimpleTransformForHLDXIR(m_pHLModule->GetModule());
-
-  // Handle lang extensions if provided.
-  if (CGM.getCodeGenOpts().HLSLExtensionsCodegen) {
-    // Add semantic defines for extensions if any are available.
-    HLSLExtensionsCodegenHelper::SemanticDefineErrorList errors =
-      CGM.getCodeGenOpts().HLSLExtensionsCodegen->WriteSemanticDefines(m_pHLModule->GetModule());
-
-    DiagnosticsEngine &Diags = CGM.getDiags();
-    for (const HLSLExtensionsCodegenHelper::SemanticDefineError& error : errors) {
-      DiagnosticsEngine::Level level = DiagnosticsEngine::Error;
-      if (error.IsWarning())
-        level = DiagnosticsEngine::Warning;
-      unsigned DiagID = Diags.getCustomDiagID(level, "%0");
-      Diags.Report(SourceLocation::getFromRawEncoding(error.Location()), DiagID) << error.Message();
-    }
-
-    // Add root signature from a #define. Overrides root signature in function attribute.
-    {
-      using Status = HLSLExtensionsCodegenHelper::CustomRootSignature::Status;
-      HLSLExtensionsCodegenHelper::CustomRootSignature customRootSig;
-      Status status = CGM.getCodeGenOpts().HLSLExtensionsCodegen->GetCustomRootSignature(&customRootSig);
-      if (status == Status::FOUND) {
-         RootSignatureHandle RootSigHandle;
-          CompileRootSignature(customRootSig.RootSignature, Diags,
-                               SourceLocation::getFromRawEncoding(customRootSig.EncodedSourceLocation),
-                               rootSigVer, DxilRootSignatureCompilationFlags::GlobalRootSignature, &RootSigHandle);
-          if (!RootSigHandle.IsEmpty()) {
-            RootSigHandle.EnsureSerializedAvailable();
-            m_pHLModule->SetSerializedRootSignature(
-                RootSigHandle.GetSerializedBytes(),
-                RootSigHandle.GetSerializedSize());
-          }
-      }
-    }
-  }
-
   // At this point, we have a high-level DXIL module - record this.
-  SetPauseResumePasses(*m_pHLModule->GetModule(), "hlsl-hlemit", "hlsl-hlensure");
+  SetPauseResumePasses(*m_pHLModule->GetModule(), "hlsl-hlemit",
+                       "hlsl-hlensure");
 }
 
 RValue CGMSHLSLRuntime::EmitHLSLBuiltinCallExpr(CodeGenFunction &CGF,

+ 2577 - 0
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -0,0 +1,2577 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// CGHLSLMSFinishCodeGen.cpp                                                 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+//  Impliment FinishCodeGen.                                                 //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+#include "CodeGenModule.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Parse/ParseHLSL.h" // root sig would be in Parser if part of lang
+
+#include "dxc/HLSL/HLModule.h"
+#include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/HlslIntrinsicOp.h"
+#include "dxc/DXIL/DxilUtil.h"
+#include "dxc/HLSL/DxilExportMap.h"
+#include "dxc/DXIL/DxilResourceProperties.h"
+#include "dxc/DXIL/DxilTypeSystem.h"
+#include "dxc/DXIL/DxilConstants.h"
+#include "dxc/DxilRootSignature/DxilRootSignature.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLMatrixType.h"
+
+#include <vector>
+#include <memory>
+
+#include "CGHLSLMSHelper.h"
+
+using namespace llvm;
+using namespace hlsl;
+using namespace CGHLSLMSHelper;
+
+
+namespace {
+
+Value *CreateHandleFromResPtr(Value *ResPtr, HLModule &HLM,
+                              llvm::Type *HandleTy, IRBuilder<> &Builder) {
+  Module &M = *HLM.GetModule();
+  // Load to make sure resource only have Ld/St use so mem2reg could remove
+  // temp resource.
+  Value *ldObj = Builder.CreateLoad(ResPtr);
+  Value *args[] = {ldObj};
+  CallInst *Handle = HLM.EmitHLOperationCall(
+      Builder, HLOpcodeGroup::HLCreateHandle, 0, HandleTy, args, M);
+  return Handle;
+}
+
+Value *CreateAnnotateHandle(HLModule &HLM, Value *Handle,
+                            DxilResourceProperties &RP, llvm::Type *ResTy,
+                            IRBuilder<> &Builder) {
+  Constant *RPConstant = resource_helper::getAsConstant(
+      RP, HLM.GetOP()->GetResourcePropertiesType(), *HLM.GetShaderModel());
+  return HLM.EmitHLOperationCall(
+      Builder, HLOpcodeGroup::HLAnnotateHandle,
+      (unsigned)HLOpcodeGroup::HLAnnotateHandle, Handle->getType(),
+      {Handle, Builder.getInt8((uint8_t)RP.Class),
+       Builder.getInt8((uint8_t)RP.Kind), RPConstant, UndefValue::get(ResTy)},
+      *HLM.GetModule());
+}
+
+void LowerGetResourceFromHeap(
+    HLModule &HLM, std::vector<std::pair<Function *, unsigned>> &intrinsicMap) {
+  llvm::Module &M = *HLM.GetModule();
+  llvm::Type *HandleTy = HLM.GetOP()->GetHandleType();
+  unsigned GetResFromHeapOp =
+      static_cast<unsigned>(IntrinsicOp::IOP_CreateResourceFromHeap);
+  DenseMap<Instruction *, Instruction *> ResourcePtrToHandlePtrMap;
+
+  for (auto it : intrinsicMap) {
+    unsigned opcode = it.second;
+    if (opcode != GetResFromHeapOp)
+      continue;
+    Function *F = it.first;
+    HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
+    if (group != HLOpcodeGroup::HLIntrinsic)
+      continue;
+    for (auto uit = F->user_begin(); uit != F->user_end();) {
+      CallInst *CI = cast<CallInst>(*(uit++));
+      Instruction *ResPtr = cast<Instruction>(CI->getArgOperand(0));
+      Value *Index = CI->getArgOperand(1);
+      IRBuilder<> Builder(CI);
+      // Make a handle from GetResFromHeap.
+      Value *Handle =
+          HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLIntrinsic,
+                                  GetResFromHeapOp, HandleTy, {Index}, M);
+
+      // Find the handle ptr for res ptr.
+      auto it = ResourcePtrToHandlePtrMap.find(ResPtr);
+      Instruction *HandlePtr = nullptr;
+      if (it != ResourcePtrToHandlePtrMap.end()) {
+        HandlePtr = it->second;
+      } else {
+        IRBuilder<> AllocaBuilder(
+            ResPtr->getParent()->getParent()->getEntryBlock().begin());
+        HandlePtr = AllocaBuilder.CreateAlloca(HandleTy);
+        ResourcePtrToHandlePtrMap[ResPtr] = HandlePtr;
+      }
+      // Store handle to handle ptr.
+      Builder.CreateStore(Handle, HandlePtr);
+      CI->eraseFromParent();
+    }
+  }
+
+  // Replace load of Resource ptr into load of handel ptr.
+  for (auto it : ResourcePtrToHandlePtrMap) {
+    Instruction *resPtr = it.first;
+    Instruction *handlePtr = it.second;
+
+    for (auto uit = resPtr->user_begin(); uit != resPtr->user_end();) {
+      User *U = *(uit++);
+      BitCastInst *BCI = cast<BitCastInst>(U);
+      DXASSERT(
+          dxilutil::IsHLSLResourceType(BCI->getType()->getPointerElementType()),
+          "illegal cast of resource ptr");
+      for (auto cuit = BCI->user_begin(); cuit != BCI->user_end();) {
+        LoadInst *LI = cast<LoadInst>(*(cuit++));
+        IRBuilder<> Builder(LI);
+        Value *Handle = Builder.CreateLoad(handlePtr);
+        Value *Res =
+            HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
+                                    (unsigned)HLCastOpcode::HandleToResCast,
+                                    LI->getType(), {Handle}, M);
+        LI->replaceAllUsesWith(Res);
+        LI->eraseFromParent();
+      }
+      BCI->eraseFromParent();
+    }
+    resPtr->eraseFromParent();
+  }
+}
+
+
+void ReplaceBoolVectorSubscript(CallInst *CI) {
+  Value *Ptr = CI->getArgOperand(0);
+  Value *Idx = CI->getArgOperand(1);
+  Value *IdxList[] = {ConstantInt::get(Idx->getType(), 0), Idx};
+
+  for (auto It = CI->user_begin(), E = CI->user_end(); It != E;) {
+    Instruction *user = cast<Instruction>(*(It++));
+
+    IRBuilder<> Builder(user);
+    Value *GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(user)) {
+      Value *NewLd = Builder.CreateLoad(GEP);
+      Value *cast = Builder.CreateZExt(NewLd, LI->getType());
+      LI->replaceAllUsesWith(cast);
+      LI->eraseFromParent();
+    } else {
+      // Must be a store inst here.
+      StoreInst *SI = cast<StoreInst>(user);
+      Value *V = SI->getValueOperand();
+      Value *cast =
+          Builder.CreateICmpNE(V, llvm::ConstantInt::get(V->getType(), 0));
+      Builder.CreateStore(cast, GEP);
+      SI->eraseFromParent();
+    }
+  }
+  CI->eraseFromParent();
+}
+
+void ReplaceBoolVectorSubscript(Function *F) {
+  for (auto It = F->user_begin(), E = F->user_end(); It != E;) {
+    User *user = *(It++);
+    CallInst *CI = cast<CallInst>(user);
+    ReplaceBoolVectorSubscript(CI);
+  }
+}
+
+// Add function body for intrinsic if possible.
+Function *CreateOpFunction(llvm::Module &M, Function *F,
+                           llvm::FunctionType *funcTy, HLOpcodeGroup group,
+                           unsigned opcode) {
+  Function *opFunc = nullptr;
+
+  llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext());
+  if (group == HLOpcodeGroup::HLIntrinsic) {
+    IntrinsicOp intriOp = static_cast<IntrinsicOp>(opcode);
+    switch (intriOp) {
+    case IntrinsicOp::MOP_Append:
+    case IntrinsicOp::MOP_Consume: {
+      bool bAppend = intriOp == IntrinsicOp::MOP_Append;
+      llvm::Type *handleTy = funcTy->getParamType(HLOperandIndex::kHandleOpIdx);
+      // Don't generate body for OutputStream::Append.
+      if (bAppend && HLModule::IsStreamOutputPtrType(handleTy)) {
+        opFunc = GetOrCreateHLFunction(M, funcTy, group, opcode);
+        break;
+      }
+
+      opFunc = GetOrCreateHLFunctionWithBody(M, funcTy, group, opcode,
+                                             bAppend ? "append" : "consume");
+      llvm::Type *counterTy = llvm::Type::getInt32Ty(M.getContext());
+      llvm::FunctionType *IncCounterFuncTy =
+          llvm::FunctionType::get(counterTy, {opcodeTy, handleTy}, false);
+      unsigned counterOpcode =
+          bAppend ? (unsigned)IntrinsicOp::MOP_IncrementCounter
+                  : (unsigned)IntrinsicOp::MOP_DecrementCounter;
+      Function *incCounterFunc =
+          GetOrCreateHLFunction(M, IncCounterFuncTy, group, counterOpcode);
+
+      llvm::Type *idxTy = counterTy;
+      llvm::Type *valTy =
+          bAppend ? funcTy->getParamType(HLOperandIndex::kAppendValOpIndex)
+                  : funcTy->getReturnType();
+
+      // Return type for subscript should be pointer type, hence in memory
+      // representation
+      llvm::Type *subscriptTy = valTy;
+      bool isBoolScalarOrVector = false;
+      if (!subscriptTy->isPointerTy()) {
+        if (subscriptTy->getScalarType()->isIntegerTy(1)) {
+          isBoolScalarOrVector = true;
+          llvm::Type *memReprType =
+              llvm::IntegerType::get(subscriptTy->getContext(), 32);
+          subscriptTy =
+              subscriptTy->isVectorTy()
+                  ? llvm::VectorType::get(memReprType,
+                                          subscriptTy->getVectorNumElements())
+                  : memReprType;
+        }
+        subscriptTy = llvm::PointerType::get(subscriptTy, 0);
+      }
+
+      llvm::FunctionType *SubscriptFuncTy = llvm::FunctionType::get(
+          subscriptTy, {opcodeTy, handleTy, idxTy}, false);
+
+      Function *subscriptFunc =
+          GetOrCreateHLFunction(M, SubscriptFuncTy, HLOpcodeGroup::HLSubscript,
+                                (unsigned)HLSubscriptOpcode::DefaultSubscript);
+
+      BasicBlock *BB =
+          BasicBlock::Create(opFunc->getContext(), "Entry", opFunc);
+      IRBuilder<> Builder(BB);
+      auto argIter = opFunc->args().begin();
+      // Skip the opcode arg.
+      argIter++;
+      Argument *thisArg = argIter++;
+      // int counter = IncrementCounter/DecrementCounter(Buf);
+      Value *incCounterOpArg = ConstantInt::get(idxTy, counterOpcode);
+      Value *counter =
+          Builder.CreateCall(incCounterFunc, {incCounterOpArg, thisArg});
+      // Buf[counter];
+      Value *subscriptOpArg = ConstantInt::get(
+          idxTy, (unsigned)HLSubscriptOpcode::DefaultSubscript);
+      Value *subscript =
+          Builder.CreateCall(subscriptFunc, {subscriptOpArg, thisArg, counter});
+
+      if (bAppend) {
+        Argument *valArg = argIter;
+        // Buf[counter] = val;
+        if (valTy->isPointerTy()) {
+          unsigned size = M.getDataLayout().getTypeAllocSize(
+              subscript->getType()->getPointerElementType());
+          Builder.CreateMemCpy(subscript, valArg, size, 1);
+        } else {
+          Value *storedVal = valArg;
+          // Convert to memory representation
+          if (isBoolScalarOrVector)
+            storedVal = Builder.CreateZExt(
+                storedVal, subscriptTy->getPointerElementType(), "frombool");
+          Builder.CreateStore(storedVal, subscript);
+        }
+        Builder.CreateRetVoid();
+      } else {
+        // return Buf[counter];
+        if (valTy->isPointerTy())
+          Builder.CreateRet(subscript);
+        else {
+          Value *retVal = Builder.CreateLoad(subscript);
+          // Convert to register representation
+          if (isBoolScalarOrVector)
+            retVal = Builder.CreateICmpNE(
+                retVal, Constant::getNullValue(retVal->getType()), "tobool");
+          Builder.CreateRet(retVal);
+        }
+      }
+    } break;
+    case IntrinsicOp::IOP_sincos: {
+      opFunc =
+          GetOrCreateHLFunctionWithBody(M, funcTy, group, opcode, "sincos");
+      llvm::Type *valTy =
+          funcTy->getParamType(HLOperandIndex::kTrinaryOpSrc0Idx);
+
+      llvm::FunctionType *sinFuncTy =
+          llvm::FunctionType::get(valTy, {opcodeTy, valTy}, false);
+      unsigned sinOp = static_cast<unsigned>(IntrinsicOp::IOP_sin);
+      unsigned cosOp = static_cast<unsigned>(IntrinsicOp::IOP_cos);
+      Function *sinFunc = GetOrCreateHLFunction(M, sinFuncTy, group, sinOp);
+      Function *cosFunc = GetOrCreateHLFunction(M, sinFuncTy, group, cosOp);
+
+      BasicBlock *BB =
+          BasicBlock::Create(opFunc->getContext(), "Entry", opFunc);
+      IRBuilder<> Builder(BB);
+      auto argIter = opFunc->args().begin();
+      // Skip the opcode arg.
+      argIter++;
+      Argument *valArg = argIter++;
+      Argument *sinPtrArg = argIter++;
+      Argument *cosPtrArg = argIter++;
+
+      Value *sinOpArg = ConstantInt::get(opcodeTy, sinOp);
+      Value *sinVal = Builder.CreateCall(sinFunc, {sinOpArg, valArg});
+      Builder.CreateStore(sinVal, sinPtrArg);
+
+      Value *cosOpArg = ConstantInt::get(opcodeTy, cosOp);
+      Value *cosVal = Builder.CreateCall(cosFunc, {cosOpArg, valArg});
+      Builder.CreateStore(cosVal, cosPtrArg);
+      // Ret.
+      Builder.CreateRetVoid();
+    } break;
+    default:
+      opFunc = GetOrCreateHLFunction(M, funcTy, group, opcode);
+      break;
+    }
+  } else if (group == HLOpcodeGroup::HLExtIntrinsic) {
+    llvm::StringRef fnName = F->getName();
+    llvm::StringRef groupName = GetHLOpcodeGroupNameByAttr(F);
+    opFunc =
+        GetOrCreateHLFunction(M, funcTy, group, &groupName, &fnName, opcode);
+  } else {
+    opFunc = GetOrCreateHLFunction(M, funcTy, group, opcode);
+  }
+
+  // Add attribute
+  if (F->hasFnAttribute(Attribute::ReadNone))
+    opFunc->addFnAttr(Attribute::ReadNone);
+  if (F->hasFnAttribute(Attribute::ReadOnly))
+    opFunc->addFnAttr(Attribute::ReadOnly);
+  return opFunc;
+}
+
+DxilResourceProperties GetResourcePropsFromIntrinsicObjectArg(
+    Value *arg, HLModule &HLM, DxilTypeSystem &typeSys,
+    DenseMap<Value *, DxilResourceProperties> &valToResPropertiesMap) {
+  DxilResourceProperties RP;
+  RP.Class = DXIL::ResourceClass::Invalid;
+
+  auto RPIt = valToResPropertiesMap.find(arg);
+  if (RPIt != valToResPropertiesMap.end()) {
+    RP = RPIt->second;
+  } else {
+    // Must be GEP.
+    GEPOperator *GEP = cast<GEPOperator>(arg);
+    // Find RP from GEP.
+    Value *Ptr = GEP->getPointerOperand();
+    // When Ptr is array of resource, check if it is another GEP.
+    while (
+        dxilutil::IsHLSLResourceType(dxilutil::GetArrayEltTy(Ptr->getType()))) {
+      if (GEPOperator *ParentGEP = dyn_cast<GEPOperator>(Ptr)) {
+        GEP = ParentGEP;
+        Ptr = GEP->getPointerOperand();
+      } else {
+        break;
+      }
+    }
+
+    RPIt = valToResPropertiesMap.find(Ptr);
+    // When ptr is array of resource, ptr could be in
+    // valToResPropertiesMap.
+    if (RPIt != valToResPropertiesMap.end()) {
+      RP = RPIt->second;
+    } else {
+      DxilStructAnnotation *Anno = nullptr;
+
+      for (auto gepIt = gep_type_begin(GEP), E = gep_type_end(GEP); gepIt != E;
+           ++gepIt) {
+
+        if (StructType *ST = dyn_cast<StructType>(*gepIt)) {
+          Anno = typeSys.GetStructAnnotation(ST);
+          DXASSERT(Anno, "missing type annotation");
+
+          unsigned Index =
+              cast<ConstantInt>(gepIt.getOperand())->getLimitedValue();
+
+          DxilFieldAnnotation &fieldAnno = Anno->GetFieldAnnotation(Index);
+          if (fieldAnno.HasResourceAttribute()) {
+            MDNode *resAttrib = fieldAnno.GetResourceAttribute();
+            DxilResourceBase R(DXIL::ResourceClass::Invalid);
+            HLM.LoadDxilResourceBaseFromMDNode(resAttrib, R);
+            switch (R.GetClass()) {
+            case DXIL::ResourceClass::SRV:
+            case DXIL::ResourceClass::UAV: {
+              DxilResource Res;
+              HLM.LoadDxilResourceFromMDNode(resAttrib, Res);
+              RP = resource_helper::loadFromResourceBase(&Res);
+            } break;
+            case DXIL::ResourceClass::Sampler: {
+              DxilSampler Sampler;
+              HLM.LoadDxilSamplerFromMDNode(resAttrib, Sampler);
+              RP = resource_helper::loadFromResourceBase(&Sampler);
+            } break;
+            default:
+              DXASSERT(0, "invalid resource attribute in filed annotation");
+              break;
+            }
+            break;
+          }
+        }
+      }
+    }
+  }
+  DXASSERT(RP.Class != DXIL::ResourceClass::Invalid,
+           "invalid resource properties");
+  return RP;
+}
+
+void AddOpcodeParamForIntrinsic(
+    HLModule &HLM, Function *F, unsigned opcode, llvm::Type *HandleTy,
+    DenseMap<Value *, DxilResourceProperties> &valToResPropertiesMap) {
+  llvm::Module &M = *HLM.GetModule();
+  llvm::FunctionType *oldFuncTy = F->getFunctionType();
+
+  SmallVector<llvm::Type *, 4> paramTyList;
+  // Add the opcode param
+  llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext());
+  paramTyList.emplace_back(opcodeTy);
+  paramTyList.append(oldFuncTy->param_begin(), oldFuncTy->param_end());
+
+  for (unsigned i = 1; i < paramTyList.size(); i++) {
+    llvm::Type *Ty = paramTyList[i];
+    if (Ty->isPointerTy()) {
+      Ty = Ty->getPointerElementType();
+      if (dxilutil::IsHLSLResourceType(Ty)) {
+        // Use handle type for resource type.
+        // This will make sure temp object variable only used by createHandle.
+        paramTyList[i] = HandleTy;
+      }
+    }
+  }
+
+  HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
+
+  if (group == HLOpcodeGroup::HLSubscript &&
+      opcode == static_cast<unsigned>(HLSubscriptOpcode::VectorSubscript)) {
+    llvm::FunctionType *FT = F->getFunctionType();
+    llvm::Type *VecArgTy = FT->getParamType(0);
+    llvm::VectorType *VType =
+        cast<llvm::VectorType>(VecArgTy->getPointerElementType());
+    llvm::Type *Ty = VType->getElementType();
+    DXASSERT(Ty->isIntegerTy(), "Only bool could use VectorSubscript");
+    llvm::IntegerType *ITy = cast<IntegerType>(Ty);
+
+    DXASSERT_LOCALVAR(ITy, ITy->getBitWidth() == 1,
+                      "Only bool could use VectorSubscript");
+
+    // The return type is i8*.
+    // Replace all uses with i1*.
+    ReplaceBoolVectorSubscript(F);
+    return;
+  }
+
+  bool isDoubleSubscriptFunc =
+      group == HLOpcodeGroup::HLSubscript &&
+      opcode == static_cast<unsigned>(HLSubscriptOpcode::DoubleSubscript);
+
+  llvm::Type *RetTy = oldFuncTy->getReturnType();
+
+  if (isDoubleSubscriptFunc) {
+    CallInst *doubleSub = cast<CallInst>(*F->user_begin());
+
+    // Change currentIdx type into coord type.
+    auto U = doubleSub->user_begin();
+    Value *user = *U;
+    CallInst *secSub = cast<CallInst>(user);
+    unsigned coordIdx = HLOperandIndex::kSubscriptIndexOpIdx;
+    // opcode operand not add yet, so the index need -1.
+    if (GetHLOpcodeGroupByName(secSub->getCalledFunction()) ==
+        HLOpcodeGroup::NotHL)
+      coordIdx -= 1;
+
+    Value *coord = secSub->getArgOperand(coordIdx);
+
+    llvm::Type *coordTy = coord->getType();
+    paramTyList[HLOperandIndex::kSubscriptIndexOpIdx] = coordTy;
+    // Add the sampleIdx or mipLevel parameter to the end.
+    paramTyList.emplace_back(opcodeTy);
+    // Change return type to be resource ret type.
+    // opcode operand not add yet, so the index need -1.
+    Value *objPtr =
+        doubleSub->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx - 1);
+    // Must be a GEP
+    GEPOperator *objGEP = cast<GEPOperator>(objPtr);
+    gep_type_iterator GEPIt = gep_type_begin(objGEP), E = gep_type_end(objGEP);
+    llvm::Type *resTy = nullptr;
+    while (GEPIt != E) {
+      if (dxilutil::IsHLSLResourceType(*GEPIt)) {
+        resTy = *GEPIt;
+        break;
+      }
+      GEPIt++;
+    }
+
+    DXASSERT(resTy, "must find the resource type");
+    // Change object type to handle type.
+    paramTyList[HLOperandIndex::kSubscriptObjectOpIdx] = HandleTy;
+    // Change RetTy into pointer of resource reture type.
+    RetTy = cast<StructType>(resTy)->getElementType(0)->getPointerTo();
+  }
+
+  llvm::FunctionType *funcTy =
+      llvm::FunctionType::get(RetTy, paramTyList, false);
+
+  Function *opFunc = CreateOpFunction(M, F, funcTy, group, opcode);
+  StringRef lower = hlsl::GetHLLowerStrategy(F);
+  if (!lower.empty())
+    hlsl::SetHLLowerStrategy(opFunc, lower);
+
+  DxilTypeSystem &typeSys = HLM.GetTypeSystem();
+
+  for (auto user = F->user_begin(); user != F->user_end();) {
+    // User must be a call.
+    CallInst *oldCI = cast<CallInst>(*(user++));
+
+    SmallVector<Value *, 4> opcodeParamList;
+    Value *opcodeConst = Constant::getIntegerValue(opcodeTy, APInt(32, opcode));
+    opcodeParamList.emplace_back(opcodeConst);
+
+    opcodeParamList.append(oldCI->arg_operands().begin(),
+                           oldCI->arg_operands().end());
+    IRBuilder<> Builder(oldCI);
+
+    if (isDoubleSubscriptFunc) {
+      // Change obj to the resource pointer.
+      Value *objVal = opcodeParamList[HLOperandIndex::kSubscriptObjectOpIdx];
+      GEPOperator *objGEP = cast<GEPOperator>(objVal);
+      SmallVector<Value *, 8> IndexList;
+      IndexList.append(objGEP->idx_begin(), objGEP->idx_end());
+      Value *lastIndex = IndexList.back();
+      ConstantInt *constIndex = cast<ConstantInt>(lastIndex);
+      DXASSERT_LOCALVAR(constIndex, constIndex->getLimitedValue() == 1,
+                        "last index must 1");
+      // Remove the last index.
+      IndexList.pop_back();
+      objVal = objGEP->getPointerOperand();
+
+      DxilResourceProperties RP = GetResourcePropsFromIntrinsicObjectArg(
+          objVal, HLM, typeSys, valToResPropertiesMap);
+
+      if (IndexList.size() > 1)
+        objVal = Builder.CreateInBoundsGEP(objVal, IndexList);
+
+      Value *Handle = CreateHandleFromResPtr(objVal, HLM, HandleTy, Builder);
+
+      Type *ResTy = objVal->getType()->getPointerElementType();
+      Handle = CreateAnnotateHandle(HLM, Handle, RP, ResTy, Builder);
+      // Change obj to the resource pointer.
+      opcodeParamList[HLOperandIndex::kSubscriptObjectOpIdx] = Handle;
+
+      // Set idx and mipIdx.
+      Value *mipIdx = opcodeParamList[HLOperandIndex::kSubscriptIndexOpIdx];
+      auto U = oldCI->user_begin();
+      Value *user = *U;
+      CallInst *secSub = cast<CallInst>(user);
+      unsigned idxOpIndex = HLOperandIndex::kSubscriptIndexOpIdx;
+      if (GetHLOpcodeGroupByName(secSub->getCalledFunction()) ==
+          HLOpcodeGroup::NotHL)
+        idxOpIndex--;
+      Value *idx = secSub->getArgOperand(idxOpIndex);
+
+      DXASSERT(secSub->hasOneUse(), "subscript should only has one use");
+
+      // Add the sampleIdx or mipLevel parameter to the end.
+      opcodeParamList[HLOperandIndex::kSubscriptIndexOpIdx] = idx;
+      opcodeParamList.emplace_back(mipIdx);
+      // Insert new call before secSub to make sure idx is ready to use.
+      Builder.SetInsertPoint(secSub);
+    }
+
+    for (unsigned i = 1; i < opcodeParamList.size(); i++) {
+      Value *arg = opcodeParamList[i];
+      llvm::Type *Ty = arg->getType();
+      if (Ty->isPointerTy()) {
+        Ty = Ty->getPointerElementType();
+        if (dxilutil::IsHLSLResourceType(Ty)) {
+
+          DxilResourceProperties RP = GetResourcePropsFromIntrinsicObjectArg(
+              arg, HLM, typeSys, valToResPropertiesMap);
+          // Use object type directly, not by pointer.
+          // This will make sure temp object variable only used by ld/st.
+          if (GEPOperator *argGEP = dyn_cast<GEPOperator>(arg)) {
+            std::vector<Value *> idxList(argGEP->idx_begin(),
+                                         argGEP->idx_end());
+            // Create instruction to avoid GEPOperator.
+            GetElementPtrInst *GEP = GetElementPtrInst::CreateInBounds(
+                argGEP->getPointerOperand(), idxList);
+            Builder.Insert(GEP);
+            arg = GEP;
+          }
+
+          llvm::Type *ResTy = arg->getType()->getPointerElementType();
+
+          Value *Handle = CreateHandleFromResPtr(arg, HLM, HandleTy, Builder);
+          Handle = CreateAnnotateHandle(HLM, Handle, RP, ResTy, Builder);
+          opcodeParamList[i] = Handle;
+        }
+      }
+    }
+
+    Value *CI = Builder.CreateCall(opFunc, opcodeParamList);
+    if (!isDoubleSubscriptFunc) {
+      // replace new call and delete the old call
+      oldCI->replaceAllUsesWith(CI);
+      oldCI->eraseFromParent();
+    } else {
+      // For double script.
+      // Replace single users use with new CI.
+      auto U = oldCI->user_begin();
+      Value *user = *U;
+      CallInst *secSub = cast<CallInst>(user);
+      secSub->replaceAllUsesWith(CI);
+      secSub->eraseFromParent();
+      oldCI->eraseFromParent();
+    }
+  }
+  // delete the function
+  F->eraseFromParent();
+}
+
+void AddOpcodeParamForIntrinsics(
+    HLModule &HLM, std::vector<std::pair<Function *, unsigned>> &intrinsicMap,
+    DenseMap<Value *, DxilResourceProperties> &valToResPropertiesMap) {
+  llvm::Type *HandleTy = HLM.GetOP()->GetHandleType();
+  for (auto mapIter : intrinsicMap) {
+    Function *F = mapIter.first;
+    if (F->user_empty()) {
+      // delete the function
+      F->eraseFromParent();
+      continue;
+    }
+
+    unsigned opcode = mapIter.second;
+    AddOpcodeParamForIntrinsic(HLM, F, opcode, HandleTy, valToResPropertiesMap);
+  }
+}
+
+}
+
+namespace {
+
+// Returns true a global value is being updated
+bool GlobalHasStoreUserRec(Value *V, std::set<Value *> &visited) {
+  bool isWriteEnabled = false;
+  if (V && visited.find(V) == visited.end()) {
+    visited.insert(V);
+    for (User *U : V->users()) {
+      if (isa<StoreInst>(U)) {
+        return true;
+      } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+        Function *F = CI->getCalledFunction();
+        if (!F->isIntrinsic()) {
+          HLOpcodeGroup hlGroup = GetHLOpcodeGroup(F);
+          switch (hlGroup) {
+          case HLOpcodeGroup::NotHL:
+            return true;
+          case HLOpcodeGroup::HLMatLoadStore: {
+            HLMatLoadStoreOpcode opCode =
+                static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
+            if (opCode == HLMatLoadStoreOpcode::ColMatStore ||
+                opCode == HLMatLoadStoreOpcode::RowMatStore)
+              return true;
+            break;
+          }
+          case HLOpcodeGroup::HLCast:
+          case HLOpcodeGroup::HLSubscript:
+            if (GlobalHasStoreUserRec(U, visited))
+              return true;
+            break;
+          default:
+            break;
+          }
+        }
+      } else if (isa<GEPOperator>(U) || isa<PHINode>(U) || isa<SelectInst>(U)) {
+        if (GlobalHasStoreUserRec(U, visited))
+          return true;
+      }
+    }
+  }
+  return isWriteEnabled;
+}
+// Returns true if any of the direct user of a global is a store inst
+// otherwise recurse through the remaining users and check if any GEP
+// exists and which in turn has a store inst as user.
+bool GlobalHasStoreUser(GlobalVariable *GV) {
+  std::set<Value *> visited;
+  Value *V = cast<Value>(GV);
+  return GlobalHasStoreUserRec(V, visited);
+}
+
+GlobalVariable *CreateStaticGlobal(llvm::Module *M, GlobalVariable *GV) {
+  Constant *GC = M->getOrInsertGlobal(GV->getName().str() + ".static.copy",
+                                      GV->getType()->getPointerElementType());
+  GlobalVariable *NGV = cast<GlobalVariable>(GC);
+  if (GV->hasInitializer()) {
+    NGV->setInitializer(GV->getInitializer());
+  } else {
+    // The copy being static, it should be initialized per llvm rules
+    NGV->setInitializer(
+        Constant::getNullValue(GV->getType()->getPointerElementType()));
+  }
+  // static global should have internal linkage
+  NGV->setLinkage(GlobalValue::InternalLinkage);
+  return NGV;
+}
+
+void CreateWriteEnabledStaticGlobals(llvm::Module *M, llvm::Function *EF) {
+  std::vector<GlobalVariable *> worklist;
+  for (GlobalVariable &GV : M->globals()) {
+    if (!GV.isConstant() && GV.getLinkage() != GlobalValue::InternalLinkage &&
+        // skip globals which are HLSL objects or group shared
+        !dxilutil::IsHLSLObjectType(GV.getType()->getPointerElementType()) &&
+        !dxilutil::IsSharedMemoryGlobal(&GV)) {
+      if (GlobalHasStoreUser(&GV))
+        worklist.emplace_back(&GV);
+      // TODO: Ensure that constant globals aren't using initializer
+      GV.setConstant(true);
+    }
+  }
+
+  IRBuilder<> Builder(
+      dxilutil::FirstNonAllocaInsertionPt(&EF->getEntryBlock()));
+  for (GlobalVariable *GV : worklist) {
+    GlobalVariable *NGV = CreateStaticGlobal(M, GV);
+    GV->replaceAllUsesWith(NGV);
+
+    // insert memcpy in all entryblocks
+    uint64_t size = M->getDataLayout().getTypeAllocSize(
+        GV->getType()->getPointerElementType());
+    Builder.CreateMemCpy(NGV, GV, size, 1);
+  }
+}
+
+} // namespace
+
+namespace {
+
+void SetEntryFunction(HLModule &HLM, Function *Entry,
+                      clang::CodeGen::CodeGenModule &CGM) {
+  if (Entry == nullptr) {
+    clang::DiagnosticsEngine &Diags = CGM.getDiags();
+    unsigned DiagID = Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                                            "cannot find entry function %0");
+    Diags.Report(DiagID) << CGM.getCodeGenOpts().HLSLEntryFunction;
+    return;
+  }
+
+  HLM.SetEntryFunction(Entry);
+}
+
+Function *CloneFunction(Function *Orig, const llvm::Twine &Name,
+                        llvm::Module *llvmModule, hlsl::DxilTypeSystem &TypeSys,
+                        hlsl::DxilTypeSystem &SrcTypeSys) {
+
+  Function *F = Function::Create(Orig->getFunctionType(),
+                                 GlobalValue::LinkageTypes::ExternalLinkage,
+                                 Name, llvmModule);
+
+  SmallVector<ReturnInst *, 2> Returns;
+  ValueToValueMapTy vmap;
+  // Map params.
+  auto entryParamIt = F->arg_begin();
+  for (Argument &param : Orig->args()) {
+    vmap[&param] = (entryParamIt++);
+  }
+
+  llvm::CloneFunctionInto(F, Orig, vmap, /*ModuleLevelChagnes*/ false, Returns);
+  TypeSys.CopyFunctionAnnotation(F, Orig, SrcTypeSys);
+
+  return F;
+}
+
+// Clone shader entry function to be called by other functions.
+// The original function will be used as shader entry.
+void CloneShaderEntry(Function *ShaderF, StringRef EntryName, HLModule &HLM) {
+  Function *F = CloneFunction(ShaderF, "", HLM.GetModule(), HLM.GetTypeSystem(),
+                              HLM.GetTypeSystem());
+
+  F->takeName(ShaderF);
+  F->setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
+  // Set to name before mangled.
+  ShaderF->setName(EntryName);
+
+  DxilFunctionAnnotation *annot = HLM.GetFunctionAnnotation(F);
+  DxilParameterAnnotation &cloneRetAnnot = annot->GetRetTypeAnnotation();
+  // Clear semantic for cloned one.
+  cloneRetAnnot.SetSemanticString("");
+  cloneRetAnnot.SetSemanticIndexVec({});
+  for (unsigned i = 0; i < annot->GetNumParameters(); i++) {
+    DxilParameterAnnotation &cloneParamAnnot = annot->GetParameterAnnotation(i);
+    // Clear semantic for cloned one.
+    cloneParamAnnot.SetSemanticString("");
+    cloneParamAnnot.SetSemanticIndexVec({});
+  }
+}
+} // namespace
+
+namespace {
+
+bool IsPatchConstantFunction(
+    const Function *F, StringMap<PatchConstantInfo> &patchConstantFunctionMap) {
+  DXASSERT_NOMSG(F != nullptr);
+  for (auto &&p : patchConstantFunctionMap) {
+    if (p.second.Func == F)
+      return true;
+  }
+  return false;
+}
+
+void SetPatchConstantFunctionWithAttr(
+    const EntryFunctionInfo &EntryFunc,
+    const clang::HLSLPatchConstantFuncAttr *PatchConstantFuncAttr,
+    StringMap<PatchConstantInfo> &patchConstantFunctionMap,
+    std::unordered_map<Function *, std::unique_ptr<DxilFunctionProps>>
+        &patchConstantFunctionPropsMap,
+    HLModule &HLM, clang::CodeGen::CodeGenModule &CGM) {
+  StringRef funcName = PatchConstantFuncAttr->getFunctionName();
+
+  auto Entry = patchConstantFunctionMap.find(funcName);
+  if (Entry == patchConstantFunctionMap.end()) {
+    clang::DiagnosticsEngine &Diags = CGM.getDiags();
+    unsigned DiagID = Diags.getCustomDiagID(
+        clang::DiagnosticsEngine::Error, "Cannot find patchconstantfunc %0.");
+    Diags.Report(PatchConstantFuncAttr->getLocation(), DiagID) << funcName;
+    return;
+  }
+
+  if (Entry->second.NumOverloads != 1) {
+    clang::DiagnosticsEngine &Diags = CGM.getDiags();
+    unsigned DiagID =
+        Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning,
+                              "Multiple overloads of patchconstantfunc %0.");
+    unsigned NoteID = Diags.getCustomDiagID(clang::DiagnosticsEngine::Note,
+                                            "This overload was selected.");
+    Diags.Report(PatchConstantFuncAttr->getLocation(), DiagID) << funcName;
+    Diags.Report(Entry->second.SL, NoteID);
+  }
+
+  Function *patchConstFunc = Entry->second.Func;
+  DXASSERT(
+      HLM.HasDxilFunctionProps(EntryFunc.Func),
+      " else AddHLSLFunctionInfo did not save the dxil function props for the "
+      "HS entry.");
+  DxilFunctionProps *HSProps = &HLM.GetDxilFunctionProps(EntryFunc.Func);
+  HLM.SetPatchConstantFunctionForHS(EntryFunc.Func, patchConstFunc);
+  DXASSERT_NOMSG(patchConstantFunctionPropsMap.count(patchConstFunc));
+  // Check no inout parameter for patch constant function.
+  DxilFunctionAnnotation *patchConstFuncAnnotation =
+      HLM.GetFunctionAnnotation(patchConstFunc);
+  for (unsigned i = 0; i < patchConstFuncAnnotation->GetNumParameters(); i++) {
+    if (patchConstFuncAnnotation->GetParameterAnnotation(i)
+            .GetParamInputQual() == DxilParamInputQual::Inout) {
+      clang::DiagnosticsEngine &Diags = CGM.getDiags();
+      unsigned DiagID = Diags.getCustomDiagID(
+          clang::DiagnosticsEngine::Error,
+          "Patch Constant function %0 should not have inout param.");
+      Diags.Report(Entry->second.SL, DiagID) << funcName;
+    }
+  }
+
+  // Input/Output control point validation.
+  if (patchConstantFunctionPropsMap.count(patchConstFunc)) {
+    const DxilFunctionProps &patchProps =
+        *patchConstantFunctionPropsMap[patchConstFunc];
+    if (patchProps.ShaderProps.HS.inputControlPoints != 0 &&
+        patchProps.ShaderProps.HS.inputControlPoints !=
+            HSProps->ShaderProps.HS.inputControlPoints) {
+      clang::DiagnosticsEngine &Diags = CGM.getDiags();
+      unsigned DiagID =
+          Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                                "Patch constant function's input patch input "
+                                "should have %0 elements, but has %1.");
+      Diags.Report(Entry->second.SL, DiagID)
+          << HSProps->ShaderProps.HS.inputControlPoints
+          << patchProps.ShaderProps.HS.inputControlPoints;
+    }
+    if (patchProps.ShaderProps.HS.outputControlPoints != 0 &&
+        patchProps.ShaderProps.HS.outputControlPoints !=
+            HSProps->ShaderProps.HS.outputControlPoints) {
+      clang::DiagnosticsEngine &Diags = CGM.getDiags();
+      unsigned DiagID =
+          Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                                "Patch constant function's output patch input "
+                                "should have %0 elements, but has %1.");
+      Diags.Report(Entry->second.SL, DiagID)
+          << HSProps->ShaderProps.HS.outputControlPoints
+          << patchProps.ShaderProps.HS.outputControlPoints;
+    }
+  }
+}
+
+void SetPatchConstantFunction(
+    const EntryFunctionInfo &EntryFunc,
+    std::unordered_map<Function *, const clang::HLSLPatchConstantFuncAttr *>
+        &HSEntryPatchConstantFuncAttr,
+    StringMap<PatchConstantInfo> &patchConstantFunctionMap,
+    std::unordered_map<Function *, std::unique_ptr<DxilFunctionProps>>
+        &patchConstantFunctionPropsMap,
+    HLModule &HLM, clang::CodeGen::CodeGenModule &CGM) {
+
+  auto AttrsIter = HSEntryPatchConstantFuncAttr.find(EntryFunc.Func);
+
+  DXASSERT(AttrsIter != HSEntryPatchConstantFuncAttr.end(),
+           "we have checked this in AddHLSLFunctionInfo()");
+
+  SetPatchConstantFunctionWithAttr(EntryFunc, AttrsIter->second,
+                                   patchConstantFunctionMap,
+                                   patchConstantFunctionPropsMap, HLM, CGM);
+}
+} // namespace
+
+namespace {
+
+// For case like:
+// cbuffer A {
+//  float a;
+//  int b;
+//}
+//
+// const static struct {
+//  float a;
+//  int b;
+//}  ST = { a, b };
+// Replace user of ST with a and b.
+bool ReplaceConstStaticGlobalUser(GEPOperator *GEP,
+                                  std::vector<Constant *> &InitList,
+                                  IRBuilder<> &Builder) {
+  if (GEP->getNumIndices() < 2) {
+    // Don't use sub element.
+    return false;
+  }
+
+  SmallVector<Value *, 4> idxList;
+  auto iter = GEP->idx_begin();
+  idxList.emplace_back(*(iter++));
+  ConstantInt *subIdx = dyn_cast<ConstantInt>(*(iter++));
+
+  DXASSERT(subIdx, "else dynamic indexing on struct field");
+  unsigned subIdxImm = subIdx->getLimitedValue();
+  DXASSERT(subIdxImm < InitList.size(), "else struct index out of bound");
+
+  Constant *subPtr = InitList[subIdxImm];
+  // Move every idx to idxList except idx for InitList.
+  while (iter != GEP->idx_end()) {
+    idxList.emplace_back(*(iter++));
+  }
+  Value *NewGEP = Builder.CreateGEP(subPtr, idxList);
+  GEP->replaceAllUsesWith(NewGEP);
+  return true;
+}
+
+} // namespace
+
+namespace CGHLSLMSHelper {
+void ReplaceConstStaticGlobals(
+    std::unordered_map<GlobalVariable *, std::vector<Constant *>>
+        &staticConstGlobalInitListMap,
+    std::unordered_map<GlobalVariable *, Function *>
+        &staticConstGlobalCtorMap) {
+
+  for (auto &iter : staticConstGlobalInitListMap) {
+    GlobalVariable *GV = iter.first;
+    std::vector<Constant *> &InitList = iter.second;
+    LLVMContext &Ctx = GV->getContext();
+    // Do the replace.
+    bool bPass = true;
+    for (User *U : GV->users()) {
+      IRBuilder<> Builder(Ctx);
+      if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+        Builder.SetInsertPoint(GEPInst);
+        bPass &= ReplaceConstStaticGlobalUser(cast<GEPOperator>(GEPInst),
+                                              InitList, Builder);
+      } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+        bPass &= ReplaceConstStaticGlobalUser(GEP, InitList, Builder);
+      } else {
+        DXASSERT(false, "invalid user of const static global");
+      }
+    }
+    // Clear the Ctor which is useless now.
+    if (bPass) {
+      Function *Ctor = staticConstGlobalCtorMap[GV];
+      Ctor->getBasicBlockList().clear();
+      BasicBlock *Entry = BasicBlock::Create(Ctx, "", Ctor);
+      IRBuilder<> Builder(Entry);
+      Builder.CreateRetVoid();
+    }
+  }
+}
+}
+
+namespace {
+
+Value *CastLdValue(Value *Ptr, llvm::Type *FromTy, llvm::Type *ToTy,
+                   IRBuilder<> &Builder) {
+  if (ToTy->isVectorTy()) {
+    unsigned vecSize = ToTy->getVectorNumElements();
+    if (vecSize == 1 && ToTy->getVectorElementType() == FromTy) {
+      Value *V = Builder.CreateLoad(Ptr);
+      // ScalarToVec1Splat
+      // Change scalar into vec1.
+      Value *Vec1 = UndefValue::get(ToTy);
+      return Builder.CreateInsertElement(Vec1, V, (uint64_t)0);
+    } else if (vecSize == 1 && FromTy->isIntegerTy() &&
+               ToTy->getVectorElementType()->isIntegerTy(1)) {
+      // load(bitcast i32* to <1 x i1>*)
+      // Rewrite to
+      // insertelement(icmp ne (load i32*), 0)
+      Value *IntV = Builder.CreateLoad(Ptr);
+      Value *BoolV = Builder.CreateICmpNE(
+          IntV, ConstantInt::get(IntV->getType(), 0), "tobool");
+      Value *Vec1 = UndefValue::get(ToTy);
+      return Builder.CreateInsertElement(Vec1, BoolV, (uint64_t)0);
+    } else if (FromTy->isVectorTy() && vecSize == 1) {
+      Value *V = Builder.CreateLoad(Ptr);
+      // VectorTrunc
+      // Change vector into vec1.
+      int mask[] = {0};
+      return Builder.CreateShuffleVector(V, V, mask);
+    } else if (FromTy->isArrayTy()) {
+      llvm::Type *FromEltTy = FromTy->getArrayElementType();
+
+      llvm::Type *ToEltTy = ToTy->getVectorElementType();
+      if (FromTy->getArrayNumElements() == vecSize && FromEltTy == ToEltTy) {
+        // ArrayToVector.
+        Value *NewLd = UndefValue::get(ToTy);
+        Value *zeroIdx = Builder.getInt32(0);
+        for (unsigned i = 0; i < vecSize; i++) {
+          Value *GEP =
+              Builder.CreateInBoundsGEP(Ptr, {zeroIdx, Builder.getInt32(i)});
+          Value *Elt = Builder.CreateLoad(GEP);
+          NewLd = Builder.CreateInsertElement(NewLd, Elt, i);
+        }
+        return NewLd;
+      }
+    }
+  } else if (FromTy == Builder.getInt1Ty()) {
+    Value *V = Builder.CreateLoad(Ptr);
+    // BoolCast
+    DXASSERT_NOMSG(ToTy->isIntegerTy());
+    return Builder.CreateZExt(V, ToTy);
+  }
+
+  return nullptr;
+}
+
+Value *CastStValue(Value *Ptr, Value *V, llvm::Type *FromTy, llvm::Type *ToTy,
+                   IRBuilder<> &Builder) {
+  if (ToTy->isVectorTy()) {
+    unsigned vecSize = ToTy->getVectorNumElements();
+    if (vecSize == 1 && ToTy->getVectorElementType() == FromTy) {
+      // ScalarToVec1Splat
+      // Change vec1 back to scalar.
+      Value *Elt = Builder.CreateExtractElement(V, (uint64_t)0);
+      return Elt;
+    } else if (FromTy->isVectorTy() && vecSize == 1) {
+      // VectorTrunc
+      // Change vec1 into vector.
+      // Should not happen.
+      // Reported error at Sema::ImpCastExprToType.
+      DXASSERT_NOMSG(0);
+    } else if (FromTy->isArrayTy()) {
+      llvm::Type *FromEltTy = FromTy->getArrayElementType();
+
+      llvm::Type *ToEltTy = ToTy->getVectorElementType();
+      if (FromTy->getArrayNumElements() == vecSize && FromEltTy == ToEltTy) {
+        // ArrayToVector.
+        Value *zeroIdx = Builder.getInt32(0);
+        for (unsigned i = 0; i < vecSize; i++) {
+          Value *Elt = Builder.CreateExtractElement(V, i);
+          Value *GEP =
+              Builder.CreateInBoundsGEP(Ptr, {zeroIdx, Builder.getInt32(i)});
+          Builder.CreateStore(Elt, GEP);
+        }
+        // The store already done.
+        // Return null to ignore use of the return value.
+        return nullptr;
+      }
+    }
+  } else if (FromTy == Builder.getInt1Ty()) {
+    // BoolCast
+    // Change i1 to ToTy.
+    DXASSERT_NOMSG(ToTy->isIntegerTy());
+    Value *CastV = Builder.CreateICmpNE(V, ConstantInt::get(V->getType(), 0));
+    return CastV;
+  }
+
+  return nullptr;
+}
+
+bool SimplifyBitCastLoad(LoadInst *LI, llvm::Type *FromTy, llvm::Type *ToTy,
+                         Value *Ptr) {
+  IRBuilder<> Builder(LI);
+  // Cast FromLd to ToTy.
+  Value *CastV = CastLdValue(Ptr, FromTy, ToTy, Builder);
+  if (CastV) {
+    LI->replaceAllUsesWith(CastV);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool SimplifyBitCastStore(StoreInst *SI, llvm::Type *FromTy, llvm::Type *ToTy,
+                          Value *Ptr) {
+  IRBuilder<> Builder(SI);
+  Value *V = SI->getValueOperand();
+  // Cast Val to FromTy.
+  Value *CastV = CastStValue(Ptr, V, FromTy, ToTy, Builder);
+  if (CastV) {
+    Builder.CreateStore(CastV, Ptr);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool SimplifyBitCastGEP(GEPOperator *GEP, llvm::Type *FromTy, llvm::Type *ToTy,
+                        Value *Ptr) {
+  if (ToTy->isVectorTy()) {
+    unsigned vecSize = ToTy->getVectorNumElements();
+    if (vecSize == 1 && ToTy->getVectorElementType() == FromTy) {
+      // ScalarToVec1Splat
+      GEP->replaceAllUsesWith(Ptr);
+      return true;
+    } else if (FromTy->isVectorTy() && vecSize == 1) {
+      // VectorTrunc
+      DXASSERT_NOMSG(
+          !isa<llvm::VectorType>(GEP->getType()->getPointerElementType()));
+      IRBuilder<> Builder(FromTy->getContext());
+      if (Instruction *I = dyn_cast<Instruction>(GEP))
+        Builder.SetInsertPoint(I);
+      std::vector<Value *> idxList(GEP->idx_begin(), GEP->idx_end());
+      Value *NewGEP = Builder.CreateInBoundsGEP(Ptr, idxList);
+      GEP->replaceAllUsesWith(NewGEP);
+      return true;
+    } else if (FromTy->isArrayTy()) {
+      llvm::Type *FromEltTy = FromTy->getArrayElementType();
+
+      llvm::Type *ToEltTy = ToTy->getVectorElementType();
+      if (FromTy->getArrayNumElements() == vecSize && FromEltTy == ToEltTy) {
+        // ArrayToVector.
+      }
+    }
+  } else if (FromTy == llvm::Type::getInt1Ty(FromTy->getContext())) {
+    // BoolCast
+  }
+  return false;
+}
+typedef SmallPtrSet<Instruction *, 4> SmallInstSet;
+void SimplifyBitCast(BitCastOperator *BC, SmallInstSet &deadInsts) {
+  Value *Ptr = BC->getOperand(0);
+  llvm::Type *FromTy = Ptr->getType();
+  llvm::Type *ToTy = BC->getType();
+
+  if (!FromTy->isPointerTy() || !ToTy->isPointerTy())
+    return;
+
+  FromTy = FromTy->getPointerElementType();
+  ToTy = ToTy->getPointerElementType();
+
+  // Take care case like %2 = bitcast %struct.T* %1 to <1 x float>*.
+  bool GEPCreated = false;
+  if (FromTy->isStructTy()) {
+    IRBuilder<> Builder(FromTy->getContext());
+    if (Instruction *I = dyn_cast<Instruction>(BC))
+      Builder.SetInsertPoint(I);
+
+    Value *zeroIdx = Builder.getInt32(0);
+    unsigned nestLevel = 1;
+    while (llvm::StructType *ST = dyn_cast<llvm::StructType>(FromTy)) {
+      if (ST->getNumElements() == 0)
+        break;
+      FromTy = ST->getElementType(0);
+      nestLevel++;
+    }
+    std::vector<Value *> idxList(nestLevel, zeroIdx);
+    Ptr = Builder.CreateGEP(Ptr, idxList);
+    GEPCreated = true;
+  }
+
+  for (User *U : BC->users()) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (SimplifyBitCastLoad(LI, FromTy, ToTy, Ptr)) {
+        LI->dropAllReferences();
+        deadInsts.insert(LI);
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SimplifyBitCastStore(SI, FromTy, ToTy, Ptr)) {
+        SI->dropAllReferences();
+        deadInsts.insert(SI);
+      }
+    } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+      if (SimplifyBitCastGEP(GEP, FromTy, ToTy, Ptr))
+        if (Instruction *I = dyn_cast<Instruction>(GEP)) {
+          I->dropAllReferences();
+          deadInsts.insert(I);
+        }
+    } else if (dyn_cast<CallInst>(U)) {
+      // Skip function call.
+    } else if (dyn_cast<BitCastInst>(U)) {
+      // Skip bitcast.
+    } else if (dyn_cast<AddrSpaceCastInst>(U)) {
+      // Skip addrspacecast.
+    } else {
+      DXASSERT(0, "not support yet");
+    }
+  }
+
+  // We created a GEP instruction but didn't end up consuming it, so delete it.
+  if (GEPCreated && Ptr->use_empty()) {
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+      GEP->eraseFromParent();
+    else
+      cast<Constant>(Ptr)->destroyConstant();
+  }
+}
+
+typedef float(__cdecl *FloatUnaryEvalFuncType)(float);
+typedef double(__cdecl *DoubleUnaryEvalFuncType)(double);
+
+typedef APInt(__cdecl *IntBinaryEvalFuncType)(const APInt &, const APInt &);
+typedef float(__cdecl *FloatBinaryEvalFuncType)(float, float);
+typedef double(__cdecl *DoubleBinaryEvalFuncType)(double, double);
+
+Value *EvalUnaryIntrinsic(ConstantFP *fpV, FloatUnaryEvalFuncType floatEvalFunc,
+                          DoubleUnaryEvalFuncType doubleEvalFunc) {
+  llvm::Type *Ty = fpV->getType();
+  Value *Result = nullptr;
+  if (Ty->isDoubleTy()) {
+    double dV = fpV->getValueAPF().convertToDouble();
+    Value *dResult = ConstantFP::get(Ty, doubleEvalFunc(dV));
+    Result = dResult;
+  } else {
+    DXASSERT_NOMSG(Ty->isFloatTy());
+    float fV = fpV->getValueAPF().convertToFloat();
+    Value *dResult = ConstantFP::get(Ty, floatEvalFunc(fV));
+    Result = dResult;
+  }
+  return Result;
+}
+
+Value *EvalBinaryIntrinsic(Constant *cV0, Constant *cV1,
+                           FloatBinaryEvalFuncType floatEvalFunc,
+                           DoubleBinaryEvalFuncType doubleEvalFunc,
+                           IntBinaryEvalFuncType intEvalFunc) {
+  llvm::Type *Ty = cV0->getType();
+  Value *Result = nullptr;
+  if (Ty->isDoubleTy()) {
+    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
+    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
+    double dV0 = fpV0->getValueAPF().convertToDouble();
+    double dV1 = fpV1->getValueAPF().convertToDouble();
+    Value *dResult = ConstantFP::get(Ty, doubleEvalFunc(dV0, dV1));
+    Result = dResult;
+  } else if (Ty->isFloatTy()) {
+    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
+    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
+    float fV0 = fpV0->getValueAPF().convertToFloat();
+    float fV1 = fpV1->getValueAPF().convertToFloat();
+    Value *dResult = ConstantFP::get(Ty, floatEvalFunc(fV0, fV1));
+    Result = dResult;
+  } else {
+    DXASSERT_NOMSG(Ty->isIntegerTy());
+    DXASSERT_NOMSG(intEvalFunc);
+    ConstantInt *ciV0 = cast<ConstantInt>(cV0);
+    ConstantInt *ciV1 = cast<ConstantInt>(cV1);
+    const APInt &iV0 = ciV0->getValue();
+    const APInt &iV1 = ciV1->getValue();
+    Value *dResult = ConstantInt::get(Ty, intEvalFunc(iV0, iV1));
+    Result = dResult;
+  }
+  return Result;
+}
+
+Value *EvalUnaryIntrinsic(CallInst *CI, FloatUnaryEvalFuncType floatEvalFunc,
+                          DoubleUnaryEvalFuncType doubleEvalFunc) {
+  Value *V = CI->getArgOperand(0);
+  llvm::Type *Ty = CI->getType();
+  Value *Result = nullptr;
+  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(Ty)) {
+    Result = UndefValue::get(Ty);
+    Constant *CV = cast<Constant>(V);
+    IRBuilder<> Builder(CI);
+    for (unsigned i = 0; i < VT->getNumElements(); i++) {
+      ConstantFP *fpV = cast<ConstantFP>(CV->getAggregateElement(i));
+      Value *EltResult = EvalUnaryIntrinsic(fpV, floatEvalFunc, doubleEvalFunc);
+      Result = Builder.CreateInsertElement(Result, EltResult, i);
+    }
+  } else {
+    ConstantFP *fpV = cast<ConstantFP>(V);
+    Result = EvalUnaryIntrinsic(fpV, floatEvalFunc, doubleEvalFunc);
+  }
+  CI->replaceAllUsesWith(Result);
+  CI->eraseFromParent();
+  return Result;
+}
+
+Value *EvalBinaryIntrinsic(CallInst *CI, FloatBinaryEvalFuncType floatEvalFunc,
+                           DoubleBinaryEvalFuncType doubleEvalFunc,
+                           IntBinaryEvalFuncType intEvalFunc = nullptr) {
+  Value *V0 = CI->getArgOperand(0);
+  Value *V1 = CI->getArgOperand(1);
+  llvm::Type *Ty = CI->getType();
+  Value *Result = nullptr;
+  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(Ty)) {
+    Result = UndefValue::get(Ty);
+    Constant *CV0 = cast<Constant>(V0);
+    Constant *CV1 = cast<Constant>(V1);
+    IRBuilder<> Builder(CI);
+    for (unsigned i = 0; i < VT->getNumElements(); i++) {
+      Constant *cV0 = cast<Constant>(CV0->getAggregateElement(i));
+      Constant *cV1 = cast<Constant>(CV1->getAggregateElement(i));
+      Value *EltResult = EvalBinaryIntrinsic(cV0, cV1, floatEvalFunc,
+                                             doubleEvalFunc, intEvalFunc);
+      Result = Builder.CreateInsertElement(Result, EltResult, i);
+    }
+  } else {
+    Constant *cV0 = cast<Constant>(V0);
+    Constant *cV1 = cast<Constant>(V1);
+    Result = EvalBinaryIntrinsic(cV0, cV1, floatEvalFunc, doubleEvalFunc,
+                                 intEvalFunc);
+  }
+  CI->replaceAllUsesWith(Result);
+  CI->eraseFromParent();
+  return Result;
+
+  CI->eraseFromParent();
+  return Result;
+}
+
+void SimpleTransformForHLDXIRInst(Instruction *I, SmallInstSet &deadInsts) {
+
+  unsigned opcode = I->getOpcode();
+  switch (opcode) {
+  case Instruction::BitCast: {
+    BitCastOperator *BCI = cast<BitCastOperator>(I);
+    SimplifyBitCast(BCI, deadInsts);
+  } break;
+  case Instruction::Load: {
+    LoadInst *ldInst = cast<LoadInst>(I);
+    DXASSERT(!HLMatrixType::isa(ldInst->getType()),
+             "matrix load should use HL LdStMatrix");
+    Value *Ptr = ldInst->getPointerOperand();
+    if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(Ptr)) {
+      if (BitCastOperator *BCO = dyn_cast<BitCastOperator>(CE)) {
+        SimplifyBitCast(BCO, deadInsts);
+      }
+    }
+  } break;
+  case Instruction::Store: {
+    StoreInst *stInst = cast<StoreInst>(I);
+    Value *V = stInst->getValueOperand();
+    DXASSERT_LOCALVAR(V, !HLMatrixType::isa(V->getType()),
+                      "matrix store should use HL LdStMatrix");
+    Value *Ptr = stInst->getPointerOperand();
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+      if (BitCastOperator *BCO = dyn_cast<BitCastOperator>(CE)) {
+        SimplifyBitCast(BCO, deadInsts);
+      }
+    }
+  } break;
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::Shl: {
+    llvm::BinaryOperator *BO = cast<llvm::BinaryOperator>(I);
+    Value *op2 = BO->getOperand(1);
+    IntegerType *Ty = cast<IntegerType>(BO->getType()->getScalarType());
+    unsigned bitWidth = Ty->getBitWidth();
+    // Clamp op2 to 0 ~ bitWidth-1
+    if (ConstantInt *cOp2 = dyn_cast<ConstantInt>(op2)) {
+      unsigned iOp2 = cOp2->getLimitedValue();
+      unsigned clampedOp2 = iOp2 & (bitWidth - 1);
+      if (iOp2 != clampedOp2) {
+        BO->setOperand(1, ConstantInt::get(op2->getType(), clampedOp2));
+      }
+    } else {
+      Value *mask = ConstantInt::get(op2->getType(), bitWidth - 1);
+      IRBuilder<> Builder(I);
+      op2 = Builder.CreateAnd(op2, mask);
+      BO->setOperand(1, op2);
+    }
+  } break;
+  }
+}
+
+} // namespace
+
+namespace CGHLSLMSHelper {
+
+Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp) {
+  switch (intriOp) {
+  case IntrinsicOp::IOP_tan: {
+    return EvalUnaryIntrinsic(CI, tanf, tan);
+  } break;
+  case IntrinsicOp::IOP_tanh: {
+    return EvalUnaryIntrinsic(CI, tanhf, tanh);
+  } break;
+  case IntrinsicOp::IOP_sin: {
+    return EvalUnaryIntrinsic(CI, sinf, sin);
+  } break;
+  case IntrinsicOp::IOP_sinh: {
+    return EvalUnaryIntrinsic(CI, sinhf, sinh);
+  } break;
+  case IntrinsicOp::IOP_cos: {
+    return EvalUnaryIntrinsic(CI, cosf, cos);
+  } break;
+  case IntrinsicOp::IOP_cosh: {
+    return EvalUnaryIntrinsic(CI, coshf, cosh);
+  } break;
+  case IntrinsicOp::IOP_asin: {
+    return EvalUnaryIntrinsic(CI, asinf, asin);
+  } break;
+  case IntrinsicOp::IOP_acos: {
+    return EvalUnaryIntrinsic(CI, acosf, acos);
+  } break;
+  case IntrinsicOp::IOP_atan: {
+    return EvalUnaryIntrinsic(CI, atanf, atan);
+  } break;
+  case IntrinsicOp::IOP_atan2: {
+    Value *V0 = CI->getArgOperand(0);
+    ConstantFP *fpV0 = cast<ConstantFP>(V0);
+
+    Value *V1 = CI->getArgOperand(1);
+    ConstantFP *fpV1 = cast<ConstantFP>(V1);
+
+    llvm::Type *Ty = CI->getType();
+    Value *Result = nullptr;
+    if (Ty->isDoubleTy()) {
+      double dV0 = fpV0->getValueAPF().convertToDouble();
+      double dV1 = fpV1->getValueAPF().convertToDouble();
+      Value *atanV = ConstantFP::get(CI->getType(), atan2(dV0, dV1));
+      CI->replaceAllUsesWith(atanV);
+      Result = atanV;
+    } else {
+      DXASSERT_NOMSG(Ty->isFloatTy());
+      float fV0 = fpV0->getValueAPF().convertToFloat();
+      float fV1 = fpV1->getValueAPF().convertToFloat();
+      Value *atanV = ConstantFP::get(CI->getType(), atan2f(fV0, fV1));
+      CI->replaceAllUsesWith(atanV);
+      Result = atanV;
+    }
+    CI->eraseFromParent();
+    return Result;
+  } break;
+  case IntrinsicOp::IOP_sqrt: {
+    return EvalUnaryIntrinsic(CI, sqrtf, sqrt);
+  } break;
+  case IntrinsicOp::IOP_rsqrt: {
+    auto rsqrtF = [](float v) -> float { return 1.0 / sqrtf(v); };
+    auto rsqrtD = [](double v) -> double { return 1.0 / sqrt(v); };
+
+    return EvalUnaryIntrinsic(CI, rsqrtF, rsqrtD);
+  } break;
+  case IntrinsicOp::IOP_exp: {
+    return EvalUnaryIntrinsic(CI, expf, exp);
+  } break;
+  case IntrinsicOp::IOP_exp2: {
+    return EvalUnaryIntrinsic(CI, exp2f, exp2);
+  } break;
+  case IntrinsicOp::IOP_log: {
+    return EvalUnaryIntrinsic(CI, logf, log);
+  } break;
+  case IntrinsicOp::IOP_log10: {
+    return EvalUnaryIntrinsic(CI, log10f, log10);
+  } break;
+  case IntrinsicOp::IOP_log2: {
+    return EvalUnaryIntrinsic(CI, log2f, log2);
+  } break;
+  case IntrinsicOp::IOP_pow: {
+    return EvalBinaryIntrinsic(CI, powf, pow);
+  } break;
+  case IntrinsicOp::IOP_max: {
+    auto maxF = [](float a, float b) -> float { return a > b ? a : b; };
+    auto maxD = [](double a, double b) -> double { return a > b ? a : b; };
+    auto imaxI = [](const APInt &a, const APInt &b) -> APInt {
+      return a.sgt(b) ? a : b;
+    };
+    return EvalBinaryIntrinsic(CI, maxF, maxD, imaxI);
+  } break;
+  case IntrinsicOp::IOP_min: {
+    auto minF = [](float a, float b) -> float { return a < b ? a : b; };
+    auto minD = [](double a, double b) -> double { return a < b ? a : b; };
+    auto iminI = [](const APInt &a, const APInt &b) -> APInt {
+      return a.slt(b) ? a : b;
+    };
+    return EvalBinaryIntrinsic(CI, minF, minD, iminI);
+  } break;
+  case IntrinsicOp::IOP_umax: {
+    DXASSERT_NOMSG(
+        CI->getArgOperand(0)->getType()->getScalarType()->isIntegerTy());
+    auto umaxI = [](const APInt &a, const APInt &b) -> APInt {
+      return a.ugt(b) ? a : b;
+    };
+    return EvalBinaryIntrinsic(CI, nullptr, nullptr, umaxI);
+  } break;
+  case IntrinsicOp::IOP_umin: {
+    DXASSERT_NOMSG(
+        CI->getArgOperand(0)->getType()->getScalarType()->isIntegerTy());
+    auto uminI = [](const APInt &a, const APInt &b) -> APInt {
+      return a.ult(b) ? a : b;
+    };
+    return EvalBinaryIntrinsic(CI, nullptr, nullptr, uminI);
+  } break;
+  case IntrinsicOp::IOP_rcp: {
+    auto rcpF = [](float v) -> float { return 1.0 / v; };
+    auto rcpD = [](double v) -> double { return 1.0 / v; };
+
+    return EvalUnaryIntrinsic(CI, rcpF, rcpD);
+  } break;
+  case IntrinsicOp::IOP_ceil: {
+    return EvalUnaryIntrinsic(CI, ceilf, ceil);
+  } break;
+  case IntrinsicOp::IOP_floor: {
+    return EvalUnaryIntrinsic(CI, floorf, floor);
+  } break;
+  case IntrinsicOp::IOP_round: {
+    return EvalUnaryIntrinsic(CI, roundf, round);
+  } break;
+  case IntrinsicOp::IOP_trunc: {
+    return EvalUnaryIntrinsic(CI, truncf, trunc);
+  } break;
+  case IntrinsicOp::IOP_frac: {
+    auto fracF = [](float v) -> float { return v - floor(v); };
+    auto fracD = [](double v) -> double { return v - floor(v); };
+
+    return EvalUnaryIntrinsic(CI, fracF, fracD);
+  } break;
+  case IntrinsicOp::IOP_isnan: {
+    Value *V = CI->getArgOperand(0);
+    ConstantFP *fV = cast<ConstantFP>(V);
+    bool isNan = fV->getValueAPF().isNaN();
+    Constant *cNan = ConstantInt::get(CI->getType(), isNan ? 1 : 0);
+    CI->replaceAllUsesWith(cNan);
+    CI->eraseFromParent();
+    return cNan;
+  } break;
+  default:
+    return nullptr;
+  }
+}
+
+// Do simple transform to make later lower pass easier.
+void SimpleTransformForHLDXIR(llvm::Module *pM) {
+  SmallInstSet deadInsts;
+  for (Function &F : pM->functions()) {
+    for (BasicBlock &BB : F.getBasicBlockList()) {
+      for (BasicBlock::iterator Iter = BB.begin(); Iter != BB.end();) {
+        Instruction *I = (Iter++);
+        if (deadInsts.count(I))
+          continue; // Skip dead instructions
+        SimpleTransformForHLDXIRInst(I, deadInsts);
+      }
+    }
+  }
+
+  for (Instruction *I : deadInsts)
+    I->dropAllReferences();
+  for (Instruction *I : deadInsts)
+    I->eraseFromParent();
+  deadInsts.clear();
+
+  for (GlobalVariable &GV : pM->globals()) {
+    if (dxilutil::IsStaticGlobal(&GV)) {
+      for (User *U : GV.users()) {
+        if (BitCastOperator *BCO = dyn_cast<BitCastOperator>(U)) {
+          SimplifyBitCast(BCO, deadInsts);
+        }
+      }
+    }
+  }
+
+  for (Instruction *I : deadInsts)
+    I->dropAllReferences();
+  for (Instruction *I : deadInsts)
+    I->eraseFromParent();
+}
+} // namespace CGHLSLMSHelper
+
+namespace {
+
+unsigned RoundToAlign(unsigned num, unsigned mod) {
+  // round num to next highest mod
+  if (mod != 0)
+    return mod * ((num + mod - 1) / mod);
+  return num;
+}
+
+// Here the size is CB size.
+// Offset still needs to be aligned based on type since this
+// is the legacy cbuffer global path.
+unsigned AlignCBufferOffset(unsigned offset, unsigned size, llvm::Type *Ty,
+                            bool bRowMajor) {
+  DXASSERT(!(offset & 1), "otherwise we have an invalid offset.");
+  bool bNeedNewRow = Ty->isArrayTy();
+  if (!bNeedNewRow && Ty->isStructTy()) {
+    if (HLMatrixType mat = HLMatrixType::dyn_cast(Ty)) {
+      bNeedNewRow |= !bRowMajor && mat.getNumColumns() > 1;
+      bNeedNewRow |= bRowMajor && mat.getNumRows() > 1;
+    } else {
+      bNeedNewRow = true;
+    }
+  }
+  unsigned scalarSizeInBytes = Ty->getScalarSizeInBits() / 8;
+
+  return AlignBufferOffsetInLegacy(offset, size, scalarSizeInBytes,
+                                   bNeedNewRow);
+}
+
+unsigned
+AllocateDxilConstantBuffer(HLCBuffer &CB,
+                           std::unordered_map<Constant *, DxilFieldAnnotation>
+                               &constVarAnnotationMap) {
+  unsigned offset = 0;
+
+  // Scan user allocated constants first.
+  // Update offset.
+  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
+    if (C->GetLowerBound() == UINT_MAX)
+      continue;
+    unsigned size = C->GetRangeSize();
+    unsigned nextOffset = size + C->GetLowerBound();
+    if (offset < nextOffset)
+      offset = nextOffset;
+  }
+
+  // Alloc after user allocated constants.
+  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
+    if (C->GetLowerBound() != UINT_MAX)
+      continue;
+
+    unsigned size = C->GetRangeSize();
+    llvm::Type *Ty = C->GetGlobalSymbol()->getType()->getPointerElementType();
+    auto fieldAnnotation = constVarAnnotationMap.at(C->GetGlobalSymbol());
+    bool bRowMajor = HLMatrixType::isa(Ty)
+                         ? fieldAnnotation.GetMatrixAnnotation().Orientation ==
+                               MatrixOrientation::RowMajor
+                         : false;
+    // Align offset.
+    offset = AlignCBufferOffset(offset, size, Ty, bRowMajor);
+    if (C->GetLowerBound() == UINT_MAX) {
+      C->SetLowerBound(offset);
+    }
+    offset += size;
+  }
+  return offset;
+}
+
+
+void AllocateDxilConstantBuffers(
+    HLModule &HLM, std::unordered_map<Constant *, DxilFieldAnnotation>
+                       &constVarAnnotationMap) {
+  for (unsigned i = 0; i < HLM.GetCBuffers().size(); i++) {
+    HLCBuffer &CB = *static_cast<HLCBuffer *>(&(HLM.GetCBuffer(i)));
+    unsigned size = AllocateDxilConstantBuffer(CB, constVarAnnotationMap);
+    CB.SetSize(size);
+  }
+}
+
+} // namespace
+
+namespace {
+
+void ReplaceUseInFunction(Value *V, Value *NewV, Function *F,
+                          IRBuilder<> &Builder) {
+  for (auto U = V->user_begin(); U != V->user_end();) {
+    User *user = *(U++);
+    if (Instruction *I = dyn_cast<Instruction>(user)) {
+      if (I->getParent()->getParent() == F) {
+        // replace use with GEP if in F
+        for (unsigned i = 0; i < I->getNumOperands(); i++) {
+          if (I->getOperand(i) == V)
+            I->setOperand(i, NewV);
+        }
+      }
+    } else {
+      // For constant operator, create local clone which use GEP.
+      // Only support GEP and bitcast.
+      if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(user)) {
+        std::vector<Value *> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
+        Value *NewGEP = Builder.CreateInBoundsGEP(NewV, idxList);
+        ReplaceUseInFunction(GEPOp, NewGEP, F, Builder);
+      } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(user)) {
+        // Change the init val into NewV with Store.
+        GV->setInitializer(nullptr);
+        Builder.CreateStore(NewV, GV);
+      } else {
+        // Must be bitcast here.
+        BitCastOperator *BC = cast<BitCastOperator>(user);
+        Value *NewBC = Builder.CreateBitCast(NewV, BC->getType());
+        ReplaceUseInFunction(BC, NewBC, F, Builder);
+      }
+    }
+  }
+}
+
+
+void MarkUsedFunctionForConst(Value *V,
+                              std::unordered_set<Function *> &usedFunc) {
+  for (auto U = V->user_begin(); U != V->user_end();) {
+    User *user = *(U++);
+    if (Instruction *I = dyn_cast<Instruction>(user)) {
+      Function *F = I->getParent()->getParent();
+      usedFunc.insert(F);
+    } else {
+      // For constant operator, create local clone which use GEP.
+      // Only support GEP and bitcast.
+      if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(user)) {
+        MarkUsedFunctionForConst(GEPOp, usedFunc);
+      } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(user)) {
+        MarkUsedFunctionForConst(GV, usedFunc);
+      } else {
+        // Must be bitcast here.
+        BitCastOperator *BC = cast<BitCastOperator>(user);
+        MarkUsedFunctionForConst(BC, usedFunc);
+      }
+    }
+  }
+}
+
+bool CreateCBufferVariable(HLCBuffer &CB, HLModule &HLM, llvm::Type *HandleTy) {
+  bool bUsed = false;
+  // Build Struct for CBuffer.
+  SmallVector<llvm::Type *, 4> Elements;
+  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
+    Value *GV = C->GetGlobalSymbol();
+    if (GV->hasNUsesOrMore(1))
+      bUsed = true;
+    // Global variable must be pointer type.
+    llvm::Type *Ty = GV->getType()->getPointerElementType();
+    Elements.emplace_back(Ty);
+  }
+  // Don't create CBuffer variable for unused cbuffer.
+  if (!bUsed)
+    return false;
+
+  llvm::Module &M = *HLM.GetModule();
+
+  bool isCBArray = CB.GetRangeSize() != 1;
+  llvm::GlobalVariable *cbGV = nullptr;
+  llvm::Type *cbTy = nullptr;
+
+  unsigned cbIndexDepth = 0;
+  if (!isCBArray) {
+    llvm::StructType *CBStructTy =
+        llvm::StructType::create(Elements, CB.GetGlobalName());
+    cbGV = new llvm::GlobalVariable(M, CBStructTy, /*IsConstant*/ true,
+                                    llvm::GlobalValue::ExternalLinkage,
+                                    /*InitVal*/ nullptr, CB.GetGlobalName());
+    cbTy = cbGV->getType();
+  } else {
+    // For array of ConstantBuffer, create array of struct instead of struct of
+    // array.
+    DXASSERT(CB.GetConstants().size() == 1,
+             "ConstantBuffer should have 1 constant");
+    Value *GV = CB.GetConstants()[0]->GetGlobalSymbol();
+    llvm::Type *CBEltTy =
+        GV->getType()->getPointerElementType()->getArrayElementType();
+    cbIndexDepth = 1;
+    while (CBEltTy->isArrayTy()) {
+      CBEltTy = CBEltTy->getArrayElementType();
+      cbIndexDepth++;
+    }
+
+    // Add one level struct type to match normal case.
+    llvm::StructType *CBStructTy =
+        llvm::StructType::create({CBEltTy}, CB.GetGlobalName());
+
+    llvm::ArrayType *CBArrayTy =
+        llvm::ArrayType::get(CBStructTy, CB.GetRangeSize());
+    cbGV = new llvm::GlobalVariable(M, CBArrayTy, /*IsConstant*/ true,
+                                    llvm::GlobalValue::ExternalLinkage,
+                                    /*InitVal*/ nullptr, CB.GetGlobalName());
+
+    cbTy = llvm::PointerType::get(CBStructTy,
+                                  cbGV->getType()->getPointerAddressSpace());
+  }
+
+  CB.SetGlobalSymbol(cbGV);
+
+  llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext());
+  llvm::Type *idxTy = opcodeTy;
+  Constant *zeroIdx = ConstantInt::get(opcodeTy, 0);
+
+  Value *HandleArgs[] = {cbGV, zeroIdx};
+
+  llvm::FunctionType *SubscriptFuncTy =
+      llvm::FunctionType::get(cbTy, {opcodeTy, HandleTy, idxTy}, false);
+
+  Function *subscriptFunc =
+      GetOrCreateHLFunction(M, SubscriptFuncTy, HLOpcodeGroup::HLSubscript,
+                            (unsigned)HLSubscriptOpcode::CBufferSubscript);
+  Constant *opArg =
+      ConstantInt::get(opcodeTy, (unsigned)HLSubscriptOpcode::CBufferSubscript);
+  Value *args[] = {opArg, nullptr, zeroIdx};
+
+  llvm::LLVMContext &Context = M.getContext();
+  llvm::Type *i32Ty = llvm::Type::getInt32Ty(Context);
+  Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
+
+  std::vector<Value *> indexArray(CB.GetConstants().size());
+  std::vector<std::unordered_set<Function *>> constUsedFuncList(
+      CB.GetConstants().size());
+
+  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
+    Value *idx = ConstantInt::get(i32Ty, C->GetID());
+    indexArray[C->GetID()] = idx;
+
+    Value *GV = C->GetGlobalSymbol();
+    MarkUsedFunctionForConst(GV, constUsedFuncList[C->GetID()]);
+  }
+
+  for (Function &F : M.functions()) {
+    if (F.isDeclaration())
+      continue;
+
+    if (GetHLOpcodeGroupByName(&F) != HLOpcodeGroup::NotHL)
+      continue;
+
+    IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
+
+    // create HL subscript to make all the use of cbuffer start from it.
+    HandleArgs[HLOperandIndex::kCreateHandleResourceOpIdx-1] = cbGV;
+    CallInst *Handle = HLM.EmitHLOperationCall(
+        Builder, HLOpcodeGroup::HLCreateHandle, 0, HandleTy, HandleArgs, M);
+
+    args[HLOperandIndex::kSubscriptObjectOpIdx] = Handle;
+    Instruction *cbSubscript =
+        cast<Instruction>(Builder.CreateCall(subscriptFunc, {args}));
+
+    // Replace constant var with GEP pGV
+    for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
+      Value *GV = C->GetGlobalSymbol();
+      if (constUsedFuncList[C->GetID()].count(&F) == 0)
+        continue;
+
+      Value *idx = indexArray[C->GetID()];
+      if (!isCBArray) {
+        Instruction *GEP = cast<Instruction>(
+            Builder.CreateInBoundsGEP(cbSubscript, {zero, idx}));
+        // TODO: make sure the debug info is synced to GEP.
+        // GEP->setDebugLoc(GV);
+        ReplaceUseInFunction(GV, GEP, &F, Builder);
+        // Delete if no use in F.
+        if (GEP->user_empty())
+          GEP->eraseFromParent();
+      } else {
+        for (auto U = GV->user_begin(); U != GV->user_end();) {
+          User *user = *(U++);
+          if (user->user_empty())
+            continue;
+          Instruction *I = dyn_cast<Instruction>(user);
+          if (I && I->getParent()->getParent() != &F)
+            continue;
+
+          IRBuilder<> *instBuilder = &Builder;
+          std::unique_ptr<IRBuilder<>> B;
+          if (I) {
+            B = llvm::make_unique<IRBuilder<>>(I);
+            instBuilder = B.get();
+          }
+
+          GEPOperator *GEPOp = cast<GEPOperator>(user);
+          std::vector<Value *> idxList;
+
+          DXASSERT(GEPOp->getNumIndices() >= 1 + cbIndexDepth,
+                   "must indexing ConstantBuffer array");
+          idxList.reserve(GEPOp->getNumIndices() - (cbIndexDepth - 1));
+
+          gep_type_iterator GI = gep_type_begin(*GEPOp),
+                            E = gep_type_end(*GEPOp);
+          idxList.push_back(GI.getOperand());
+          // change array index with 0 for struct index.
+          idxList.push_back(zero);
+          GI++;
+          Value *arrayIdx = GI.getOperand();
+          GI++;
+          for (unsigned curIndex = 1; GI != E && curIndex < cbIndexDepth;
+               ++GI, ++curIndex) {
+            arrayIdx = instBuilder->CreateMul(
+                arrayIdx, Builder.getInt32(GI->getArrayNumElements()));
+            arrayIdx = instBuilder->CreateAdd(arrayIdx, GI.getOperand());
+          }
+
+          for (; GI != E; ++GI) {
+            idxList.push_back(GI.getOperand());
+          }
+
+          HandleArgs[HLOperandIndex::kCreateHandleIndexOpIdx-1] = arrayIdx;
+          CallInst *Handle =
+
+              HLM.EmitHLOperationCall(*instBuilder,
+                                      HLOpcodeGroup::HLCreateHandle, 0,
+                                      HandleTy, HandleArgs, M);
+
+          args[HLOperandIndex::kSubscriptObjectOpIdx] = Handle;
+          args[HLOperandIndex::kSubscriptIndexOpIdx] = arrayIdx;
+
+          Instruction *cbSubscript =
+              cast<Instruction>(instBuilder->CreateCall(subscriptFunc, {args}));
+
+          Instruction *NewGEP = cast<Instruction>(
+              instBuilder->CreateInBoundsGEP(cbSubscript, idxList));
+
+          ReplaceUseInFunction(GEPOp, NewGEP, &F, *instBuilder);
+        }
+      }
+    }
+    // Delete if no use in F.
+    if (cbSubscript->user_empty()) {
+      cbSubscript->eraseFromParent();
+      Handle->eraseFromParent();
+    } else {
+      // merge GEP use for cbSubscript.
+      HLModule::MergeGepUse(cbSubscript);
+    }
+  }
+  return true;
+}
+
+void ConstructCBufferAnnotation(
+    HLCBuffer &CB, DxilTypeSystem &dxilTypeSys,
+    std::unordered_map<Constant *, DxilFieldAnnotation> &AnnotationMap) {
+  Value *GV = CB.GetGlobalSymbol();
+
+  llvm::StructType *CBStructTy =
+      dyn_cast<llvm::StructType>(GV->getType()->getPointerElementType());
+
+  if (!CBStructTy) {
+    // For Array of ConstantBuffer.
+    llvm::ArrayType *CBArrayTy =
+        cast<llvm::ArrayType>(GV->getType()->getPointerElementType());
+    CBStructTy = cast<llvm::StructType>(CBArrayTy->getArrayElementType());
+  }
+
+  DxilStructAnnotation *CBAnnotation =
+      dxilTypeSys.AddStructAnnotation(CBStructTy);
+  CBAnnotation->SetCBufferSize(CB.GetSize());
+
+  // Set fieldAnnotation for each constant var.
+  for (const std::unique_ptr<DxilResourceBase> &C : CB.GetConstants()) {
+    Constant *GV = C->GetGlobalSymbol();
+    DxilFieldAnnotation &fieldAnnotation =
+        CBAnnotation->GetFieldAnnotation(C->GetID());
+    fieldAnnotation = AnnotationMap[GV];
+    // This is after CBuffer allocation.
+    fieldAnnotation.SetCBufferOffset(C->GetLowerBound());
+    fieldAnnotation.SetFieldName(C->GetGlobalName());
+  }
+}
+
+
+void ConstructCBuffer(
+    HLModule &HLM, llvm::Type *CBufferType,
+    std::unordered_map<Constant *, DxilFieldAnnotation> &AnnotationMap) {
+  DxilTypeSystem &dxilTypeSys = HLM.GetTypeSystem();
+  llvm::Type *HandleTy = HLM.GetOP()->GetHandleType();
+  for (unsigned i = 0; i < HLM.GetCBuffers().size(); i++) {
+    HLCBuffer &CB = *static_cast<HLCBuffer *>(&(HLM.GetCBuffer(i)));
+    if (CB.GetConstants().size() == 0) {
+      // Create Fake variable for cbuffer which is empty.
+      llvm::GlobalVariable *pGV = new llvm::GlobalVariable(
+          *HLM.GetModule(), CBufferType, true,
+          llvm::GlobalValue::ExternalLinkage, nullptr, CB.GetGlobalName());
+      CB.SetGlobalSymbol(pGV);
+    } else {
+      bool bCreated = CreateCBufferVariable(CB, HLM, HandleTy);
+      if (bCreated)
+        ConstructCBufferAnnotation(CB, dxilTypeSys, AnnotationMap);
+      else {
+        // Create Fake variable for cbuffer which is unused.
+        llvm::GlobalVariable *pGV = new llvm::GlobalVariable(
+            *HLM.GetModule(), CBufferType, true,
+            llvm::GlobalValue::ExternalLinkage, nullptr, CB.GetGlobalName());
+        CB.SetGlobalSymbol(pGV);
+      }
+    }
+    // Clear the constants which useless now.
+    CB.GetConstants().clear();
+  }
+}
+}
+
+namespace CGHLSLMSHelper {
+
+// Align cbuffer offset in legacy mode (16 bytes per row).
+unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
+                                   unsigned scalarSizeInBytes,
+                                   bool bNeedNewRow) {
+  if (unsigned remainder = (offset & 0xf)) {
+    // Start from new row
+    if (remainder + size > 16 || bNeedNewRow) {
+      return offset + 16 - remainder;
+    }
+    // If not, naturally align data
+    return RoundToAlign(offset, scalarSizeInBytes);
+  }
+  return offset;
+}
+
+// Translate RayQuery constructor.  From:
+//  %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
+// To:
+//  i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
+//  %flags) %gep = GEP %"RayQuery<flags>" %ptr, 0, 0 store i32* %gep, i32
+//  %handle ; and replace uses of %call with %ptr
+void TranslateRayQueryConstructor(HLModule &HLM) {
+  llvm::Module &M = *HLM.GetModule();
+  SmallVector<Function *, 4> Constructors;
+  for (auto &F : M.functions()) {
+    // Match templated RayQuery constructor instantiation by prefix and
+    // signature. It should be impossible to achieve the same signature from
+    // HLSL.
+    if (!F.getName().startswith("\01??0?$RayQuery@$"))
+      continue;
+    llvm::Type *Ty = F.getReturnType();
+    if (!Ty->isPointerTy() ||
+        !dxilutil::IsHLSLRayQueryType(Ty->getPointerElementType()))
+      continue;
+    if (F.arg_size() != 1 || Ty != F.arg_begin()->getType())
+      continue;
+    Constructors.emplace_back(&F);
+  }
+
+  for (auto pConstructorFunc : Constructors) {
+    llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
+    llvm::ConstantInt *i32Zero =
+        llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
+    llvm::FunctionType *funcTy =
+        llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
+    unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
+    llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
+    Function *opFunc =
+        GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
+
+    while (!pConstructorFunc->user_empty()) {
+      Value *V = *pConstructorFunc->user_begin();
+      llvm::CallInst *CI = cast<CallInst>(V); // Must be call
+      llvm::Value *pThis = CI->getArgOperand(0);
+      llvm::StructType *pRQType =
+          cast<llvm::StructType>(pThis->getType()->getPointerElementType());
+      DxilStructAnnotation *SA =
+          HLM.GetTypeSystem().GetStructAnnotation(pRQType);
+      DXASSERT(SA, "otherwise, could not find type annoation for RayQuery "
+                   "specialization");
+      DXASSERT(SA->GetNumTemplateArgs() == 1 &&
+                   SA->GetTemplateArgAnnotation(0).IsIntegral(),
+               "otherwise, RayQuery has changed, or lacks template args");
+      llvm::IRBuilder<> Builder(CI);
+      llvm::Value *rayFlags =
+          Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
+      llvm::Value *Call =
+          Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
+      llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
+      Builder.CreateStore(Call, GEP);
+      CI->replaceAllUsesWith(pThis);
+      CI->eraseFromParent();
+    }
+    pConstructorFunc->eraseFromParent();
+  }
+}
+}
+
+namespace {
+
+bool BuildImmInit(Function *Ctor) {
+  GlobalVariable *GV = nullptr;
+  SmallVector<Constant *, 4> ImmList;
+  bool allConst = true;
+  for (inst_iterator I = inst_begin(Ctor), E = inst_end(Ctor); I != E; ++I) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(&(*I))) {
+      Value *V = SI->getValueOperand();
+      if (!isa<Constant>(V) || V->getType()->isPointerTy()) {
+        allConst = false;
+        break;
+      }
+      ImmList.emplace_back(cast<Constant>(V));
+      Value *Ptr = SI->getPointerOperand();
+      if (GEPOperator *GepOp = dyn_cast<GEPOperator>(Ptr)) {
+        Ptr = GepOp->getPointerOperand();
+        if (GlobalVariable *pGV = dyn_cast<GlobalVariable>(Ptr)) {
+          if (GV == nullptr)
+            GV = pGV;
+          else {
+            DXASSERT(GV == pGV, "else pointer mismatch");
+          }
+        }
+      }
+    } else {
+      if (!isa<ReturnInst>(*I)) {
+        allConst = false;
+        break;
+      }
+    }
+  }
+  if (!allConst)
+    return false;
+  if (!GV)
+    return false;
+
+  llvm::Type *Ty = GV->getType()->getElementType();
+  llvm::ArrayType *AT = dyn_cast<llvm::ArrayType>(Ty);
+  // TODO: support other types.
+  if (!AT)
+    return false;
+  if (ImmList.size() != AT->getNumElements())
+    return false;
+  Constant *Init = llvm::ConstantArray::get(AT, ImmList);
+  GV->setInitializer(Init);
+  return true;
+}
+
+
+} // namespace
+
+namespace CGHLSLMSHelper {
+
+void ProcessCtorFunctions(llvm::Module &M, StringRef globalName,
+                          Instruction *InsertPt) {
+  // add global call to entry func
+  GlobalVariable *GV = M.getGlobalVariable(globalName);
+  if (!GV)
+    return;
+  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA)
+    return;
+  IRBuilder<> Builder(InsertPt);
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    if (isa<ConstantAggregateZero>(*i))
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    if (isa<ConstantPointerNull>(CS->getOperand(1)))
+      continue;
+
+    // Must have a function or null ptr.
+    if (!isa<Function>(CS->getOperand(1)))
+      continue;
+    Function *Ctor = cast<Function>(CS->getOperand(1));
+    DXASSERT(Ctor->getReturnType()->isVoidTy() && Ctor->arg_size() == 0,
+             "function type must be void (void)");
+
+    for (inst_iterator I = inst_begin(Ctor), E = inst_end(Ctor); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(&(*I))) {
+        Function *F = CI->getCalledFunction();
+        // Try to build imm initilizer.
+        // If not work, add global call to entry func.
+        if (BuildImmInit(F) == false) {
+          Builder.CreateCall(F);
+        }
+      } else {
+        DXASSERT(isa<ReturnInst>(&(*I)),
+                 "else invalid Global constructor function");
+      }
+    }
+  }
+  // remove the GV
+  GV->eraseFromParent();
+}
+
+void FinishCBuffer(HLModule &HLM, llvm::Type *CBufferType,
+                   std::unordered_map<Constant *, DxilFieldAnnotation>
+                       &constVarAnnotationMap) {
+  // Allocate constant buffers.
+  AllocateDxilConstantBuffers(HLM, constVarAnnotationMap);
+  // TODO: create temp variable for constant which has store use.
+
+  // Create Global variable and type annotation for each CBuffer.
+  ConstructCBuffer(HLM, CBufferType, constVarAnnotationMap);
+}
+
+void AddRegBindingsForResourceInConstantBuffer(
+    HLModule &HLM,
+    llvm::DenseMap<llvm::Constant *,
+                   llvm::SmallVector<std::pair<DXIL::ResourceClass, unsigned>,
+                                     1>> &constantRegBindingMap) {
+  for (unsigned i = 0; i < HLM.GetCBuffers().size(); i++) {
+    HLCBuffer &CB = *static_cast<HLCBuffer *>(&(HLM.GetCBuffer(i)));
+    auto &Constants = CB.GetConstants();
+    for (unsigned j = 0; j < Constants.size(); j++) {
+      const std::unique_ptr<DxilResourceBase> &C = Constants[j];
+      Constant *CGV = C->GetGlobalSymbol();
+      auto &regBindings = constantRegBindingMap[CGV];
+      if (regBindings.empty())
+        continue;
+      unsigned Srv = UINT_MAX;
+      unsigned Uav = UINT_MAX;
+      unsigned Sampler = UINT_MAX;
+      for (auto it : regBindings) {
+        unsigned RegNum = it.second;
+        switch (it.first) {
+        case DXIL::ResourceClass::SRV:
+          Srv = RegNum;
+          break;
+        case DXIL::ResourceClass::UAV:
+          Uav = RegNum;
+          break;
+        case DXIL::ResourceClass::Sampler:
+          Sampler = RegNum;
+          break;
+        default:
+          DXASSERT(0, "invalid resource class");
+          break;
+        }
+      }
+      HLM.AddRegBinding(CB.GetID(), j, Srv, Uav, Sampler);
+    }
+  }
+}
+
+
+// extension codegen.
+void ExtensionCodeGen(HLModule &HLM, clang::CodeGen::CodeGenModule &CGM) {
+  // Add semantic defines for extensions if any are available.
+  HLSLExtensionsCodegenHelper::SemanticDefineErrorList errors =
+      CGM.getCodeGenOpts().HLSLExtensionsCodegen->WriteSemanticDefines(
+          HLM.GetModule());
+
+  clang::DiagnosticsEngine &Diags = CGM.getDiags();
+  for (const HLSLExtensionsCodegenHelper::SemanticDefineError &error : errors) {
+    clang::DiagnosticsEngine::Level level = clang::DiagnosticsEngine::Error;
+    if (error.IsWarning())
+      level = clang::DiagnosticsEngine::Warning;
+    unsigned DiagID = Diags.getCustomDiagID(level, "%0");
+    Diags.Report(clang::SourceLocation::getFromRawEncoding(error.Location()),
+                 DiagID)
+        << error.Message();
+  }
+
+  // Add root signature from a #define. Overrides root signature in function
+  // attribute.
+  {
+    using Status = HLSLExtensionsCodegenHelper::CustomRootSignature::Status;
+    HLSLExtensionsCodegenHelper::CustomRootSignature customRootSig;
+    HLSLExtensionsCodegenHelper::CustomRootSignature::Status status =
+        CGM.getCodeGenOpts().HLSLExtensionsCodegen->GetCustomRootSignature(
+            &customRootSig);
+    if (status == Status::FOUND) {
+      DxilRootSignatureVersion rootSigVer;
+      // set root signature version.
+      if (CGM.getLangOpts().RootSigMinor == 0) {
+        rootSigVer = hlsl::DxilRootSignatureVersion::Version_1_0;
+      } else {
+        DXASSERT(CGM.getLangOpts().RootSigMinor == 1,
+                 "else CGMSHLSLRuntime Constructor needs to be updated");
+        rootSigVer = hlsl::DxilRootSignatureVersion::Version_1_1;
+      }
+
+      RootSignatureHandle RootSigHandle;
+      CompileRootSignature(
+          customRootSig.RootSignature, Diags,
+          clang::SourceLocation::getFromRawEncoding(
+              customRootSig.EncodedSourceLocation),
+          rootSigVer, DxilRootSignatureCompilationFlags::GlobalRootSignature,
+          &RootSigHandle);
+      if (!RootSigHandle.IsEmpty()) {
+        RootSigHandle.EnsureSerializedAvailable();
+        HLM.SetSerializedRootSignature(RootSigHandle.GetSerializedBytes(),
+                                       RootSigHandle.GetSerializedSize());
+      }
+    }
+  }
+}
+} // namespace CGHLSLMSHelper
+
+namespace {
+void ReportDisallowedTypeInExportParam(clang::CodeGen ::CodeGenModule &CGM,
+                                       StringRef name) {
+  clang::DiagnosticsEngine &Diags = CGM.getDiags();
+  unsigned DiagID =
+      Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                            "Exported function %0 must not contain a "
+                            "resource in parameter or return type.");
+  std::string escaped;
+  llvm::raw_string_ostream os(escaped);
+  dxilutil::PrintEscapedString(name, os);
+  Diags.Report(DiagID) << os.str();
+}
+} // namespace
+
+namespace CGHLSLMSHelper {
+void FinishClipPlane(HLModule &HLM, std::vector<Function *> &clipPlaneFuncList,
+                    std::unordered_map<Value *, DebugLoc> &debugInfoMap,
+                    clang::CodeGen::CodeGenModule &CGM) {
+  bool bDebugInfo = CGM.getCodeGenOpts().getDebugInfo() ==
+                    clang::CodeGenOptions::FullDebugInfo;
+  Module &M = *HLM.GetModule();
+
+  for (Function *F : clipPlaneFuncList) {
+    DxilFunctionProps &props = HLM.GetDxilFunctionProps(F);
+    IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+
+    for (unsigned i = 0; i < DXIL::kNumClipPlanes; i++) {
+      Value *clipPlane = props.ShaderProps.VS.clipPlanes[i];
+      if (!clipPlane)
+        continue;
+      if (bDebugInfo) {
+        Builder.SetCurrentDebugLocation(debugInfoMap[clipPlane]);
+      }
+      llvm::Type *Ty = clipPlane->getType()->getPointerElementType();
+      // Constant *zeroInit = ConstantFP::get(Ty, 0);
+      GlobalVariable *GV = new llvm::GlobalVariable(
+          M, Ty, /*IsConstant*/ false, // constant false to store.
+          llvm::GlobalValue::ExternalLinkage,
+          /*InitVal*/ nullptr, Twine("SV_ClipPlane") + Twine(i));
+      Value *initVal = Builder.CreateLoad(clipPlane);
+      Builder.CreateStore(initVal, GV);
+      props.ShaderProps.VS.clipPlanes[i] = GV;
+    }
+  }
+}
+} // namespace
+
+namespace {
+void LowerExportFunctions(HLModule &HLM, clang::CodeGen::CodeGenModule &CGM,
+                          dxilutil::ExportMap &exportMap,
+                          StringMap<EntryFunctionInfo> &entryFunctionMap) {
+  bool bIsLib = HLM.GetShaderModel()->IsLib();
+  Module &M = *HLM.GetModule();
+
+  if (bIsLib && !exportMap.empty()) {
+    for (auto &it : entryFunctionMap) {
+      if (HLM.HasDxilFunctionProps(it.second.Func)) {
+        const DxilFunctionProps &props =
+            HLM.GetDxilFunctionProps(it.second.Func);
+        if (props.IsHS())
+          exportMap.RegisterExportedFunction(
+              props.ShaderProps.HS.patchConstantFunc);
+      }
+    }
+  }
+
+  if (bIsLib && !exportMap.empty()) {
+    exportMap.BeginProcessing();
+    for (Function &f : M.functions()) {
+      if (f.isDeclaration() || f.isIntrinsic() ||
+          GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL)
+        continue;
+      exportMap.ProcessFunction(&f, true);
+    }
+    // TODO: add subobject export names here.
+    if (!exportMap.EndProcessing()) {
+      for (auto &name : exportMap.GetNameCollisions()) {
+        clang::DiagnosticsEngine &Diags = CGM.getDiags();
+        unsigned DiagID = Diags.getCustomDiagID(
+            clang::DiagnosticsEngine::Error,
+            "Export name collides with another export: %0");
+        std::string escaped;
+        llvm::raw_string_ostream os(escaped);
+        dxilutil::PrintEscapedString(name, os);
+        Diags.Report(DiagID) << os.str();
+      }
+      for (auto &name : exportMap.GetUnusedExports()) {
+        clang::DiagnosticsEngine &Diags = CGM.getDiags();
+        unsigned DiagID =
+            Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                                  "Could not find target for export: %0");
+        std::string escaped;
+        llvm::raw_string_ostream os(escaped);
+        dxilutil::PrintEscapedString(name, os);
+        Diags.Report(DiagID) << os.str();
+      }
+    }
+  }
+
+  for (auto &it : exportMap.GetFunctionRenames()) {
+    Function *F = it.first;
+    auto &renames = it.second;
+
+    if (renames.empty())
+      continue;
+
+    // Rename the original, if necessary, then clone the rest
+    if (renames.find(F->getName()) == renames.end())
+      F->setName(*renames.begin());
+
+    for (auto &itName : renames) {
+      if (F->getName() != itName) {
+        Function *pClone = CloneFunction(F, itName, &M, HLM.GetTypeSystem(),
+                                         HLM.GetTypeSystem());
+        // add DxilFunctionProps if entry
+        if (HLM.HasDxilFunctionProps(F)) {
+          DxilFunctionProps &props = HLM.GetDxilFunctionProps(F);
+          auto newProps = llvm::make_unique<DxilFunctionProps>(props);
+          HLM.AddDxilFunctionProps(pClone, newProps);
+        }
+      }
+    }
+  }
+}
+
+void CheckResourceParameters(HLModule &HLM,
+                             clang::CodeGen::CodeGenModule &CGM) {
+  Module &M = *HLM.GetModule();
+  for (Function &f : M.functions()) {
+    // Skip llvm intrinsics, non-external linkage, entry/patch constant func,
+    // and HL intrinsics
+    if (!f.isIntrinsic() &&
+        f.getLinkage() == GlobalValue::LinkageTypes::ExternalLinkage &&
+        !HLM.HasDxilFunctionProps(&f) && !HLM.IsPatchConstantShader(&f) &&
+        GetHLOpcodeGroup(&f) == HLOpcodeGroup::NotHL) {
+      // Verify no resources in param/return types
+      if (dxilutil::ContainsHLSLObjectType(f.getReturnType())) {
+        ReportDisallowedTypeInExportParam(CGM, f.getName());
+        continue;
+      }
+      for (auto &Arg : f.args()) {
+        if (dxilutil::ContainsHLSLObjectType(Arg.getType())) {
+          ReportDisallowedTypeInExportParam(CGM, f.getName());
+          break;
+        }
+      }
+    }
+  }
+}
+
+} // namespace
+
+namespace CGHLSLMSHelper {
+
+void UpdateLinkage(HLModule &HLM, clang::CodeGen::CodeGenModule &CGM,
+                   dxilutil::ExportMap &exportMap,
+                   StringMap<EntryFunctionInfo> &entryFunctionMap,
+                   StringMap<PatchConstantInfo> &patchConstantFunctionMap) {
+
+  bool bIsLib = HLM.GetShaderModel()->IsLib();
+  Module &M = *HLM.GetModule();
+  // Pin entry point and constant buffers, mark everything else internal.
+  for (Function &f : M.functions()) {
+    if (!bIsLib) {
+      if (&f == HLM.GetEntryFunction() ||
+          IsPatchConstantFunction(&f, patchConstantFunctionMap) ||
+          f.isDeclaration()) {
+        if (f.isDeclaration() && !f.isIntrinsic() &&
+            GetHLOpcodeGroup(&f) == HLOpcodeGroup::NotHL) {
+          clang::DiagnosticsEngine &Diags = CGM.getDiags();
+          unsigned DiagID = Diags.getCustomDiagID(
+              clang::DiagnosticsEngine::Error,
+              "External function used in non-library profile: %0");
+          std::string escaped;
+          llvm::raw_string_ostream os(escaped);
+          dxilutil::PrintEscapedString(f.getName(), os);
+          Diags.Report(DiagID) << os.str();
+          return;
+        }
+        f.setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
+      } else {
+        f.setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
+      }
+    }
+    // Skip no inline functions.
+    if (f.hasFnAttribute(llvm::Attribute::NoInline))
+      continue;
+    // Always inline for used functions.
+    if (!f.user_empty() && !f.isDeclaration())
+      f.addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  LowerExportFunctions(HLM, CGM, exportMap, entryFunctionMap);
+
+  if (CGM.getCodeGenOpts().ExportShadersOnly) {
+    for (Function &f : M.functions()) {
+      // Skip declarations, intrinsics, shaders, and non-external linkage
+      if (f.isDeclaration() || f.isIntrinsic() ||
+          GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL ||
+          HLM.HasDxilFunctionProps(&f) || HLM.IsPatchConstantShader(&f) ||
+          f.getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage)
+        continue;
+      // Mark non-shader user functions as InternalLinkage
+      f.setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
+    }
+  }
+  // Now iterate hull shaders and make sure their corresponding patch constant
+  // functions are marked ExternalLinkage:
+  for (Function &f : M.functions()) {
+    if (f.isDeclaration() || f.isIntrinsic() ||
+        GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL ||
+        f.getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage ||
+        !HLM.HasDxilFunctionProps(&f))
+      continue;
+    DxilFunctionProps &props = HLM.GetDxilFunctionProps(&f);
+    if (!props.IsHS())
+      continue;
+    Function *PCFunc = props.ShaderProps.HS.patchConstantFunc;
+    if (PCFunc->getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage)
+      PCFunc->setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
+  }
+
+  // Disallow resource arguments in (non-entry) function exports
+  // unless offline linking target.
+  if (bIsLib &&
+      HLM.GetShaderModel()->GetMinor() != ShaderModel::kOfflineMinor) {
+    CheckResourceParameters(HLM, CGM);
+  }
+}
+void FinishEntries(
+    HLModule &HLM, const EntryFunctionInfo &Entry,
+    clang::CodeGen::CodeGenModule &CGM,
+    StringMap<EntryFunctionInfo> &entryFunctionMap,
+    std::unordered_map<Function *, const clang::HLSLPatchConstantFuncAttr *>
+        &HSEntryPatchConstantFuncAttr,
+    StringMap<PatchConstantInfo> &patchConstantFunctionMap,
+    std::unordered_map<Function *, std::unique_ptr<DxilFunctionProps>>
+        &patchConstantFunctionPropsMap) {
+
+  bool bIsLib = HLM.GetShaderModel()->IsLib();
+  // Library don't have entry.
+  if (!bIsLib) {
+    SetEntryFunction(HLM, Entry.Func, CGM);
+
+    // If at this point we haven't determined the entry function it's an error.
+    if (HLM.GetEntryFunction() == nullptr) {
+      assert(CGM.getDiags().hasErrorOccurred() &&
+             "else SetEntryFunction should have reported this condition");
+      return;
+    }
+
+    // In back-compat mode (with /Gec flag) create a static global for each
+    // const global to allow writing to it.
+    // TODO: Verfiy the behavior of static globals in hull shader
+    if (CGM.getLangOpts().EnableDX9CompatMode &&
+        CGM.getLangOpts().HLSLVersion <= 2016)
+      CreateWriteEnabledStaticGlobals(HLM.GetModule(), HLM.GetEntryFunction());
+    if (HLM.GetShaderModel()->IsHS()) {
+      SetPatchConstantFunction(Entry, HSEntryPatchConstantFuncAttr,
+                               patchConstantFunctionMap,
+                               patchConstantFunctionPropsMap, HLM, CGM);
+    }
+  } else {
+    for (auto &it : entryFunctionMap) {
+      // skip clone if RT entry
+      if (HLM.GetDxilFunctionProps(it.second.Func).IsRay())
+        continue;
+
+      // TODO: change flattened function names to dx.entry.<name>:
+      // std::string entryName = (Twine(dxilutil::EntryPrefix) +
+      // it.getKey()).str();
+      CloneShaderEntry(it.second.Func, it.getKey(), HLM);
+
+      auto AttrIter = HSEntryPatchConstantFuncAttr.find(it.second.Func);
+      if (AttrIter != HSEntryPatchConstantFuncAttr.end()) {
+        SetPatchConstantFunctionWithAttr(
+            it.second, AttrIter->second, patchConstantFunctionMap,
+            patchConstantFunctionPropsMap, HLM, CGM);
+      }
+    }
+  }
+}
+} // namespace
+
+namespace CGHLSLMSHelper {
+void FinishIntrinsics(
+    HLModule &HLM, std::vector<std::pair<Function *, unsigned>> &intrinsicMap,
+    DenseMap<Value *, DxilResourceProperties> &valToResPropertiesMap) {
+  // Lower getResourceHeap before AddOpcodeParamForIntrinsics to skip automatic
+  // lower for getResourceFromHeap.
+  LowerGetResourceFromHeap(HLM, intrinsicMap);
+  // translate opcode into parameter for intrinsic functions
+  // Do this before CloneShaderEntry and TranslateRayQueryConstructor to avoid
+  // update valToResPropertiesMap for cloned inst.
+  AddOpcodeParamForIntrinsics(HLM, intrinsicMap, valToResPropertiesMap);
+}
+}

+ 131 - 0
tools/clang/lib/CodeGen/CGHLSLMSHelper.h

@@ -0,0 +1,131 @@
+
+
+#pragma once
+
+#include "clang/Basic/SourceLocation.h"
+
+#include "llvm/ADT/StringMap.h"
+
+#include "dxc/DXIL/DxilCBuffer.h"
+
+#include <memory>
+#include <vector>
+
+namespace clang {
+class HLSLPatchConstantFuncAttr;
+namespace CodeGen {
+class CodeGenModule;
+}
+}
+
+namespace llvm {
+class Function;
+class Module;
+class Value;
+class DebugLoc;
+class Constant;
+class GlobalVariable;
+class CallInst;
+}
+
+namespace hlsl {
+class HLModule;
+struct DxilResourceProperties;
+struct DxilFunctionProps;
+class DxilFieldAnnotation;
+enum class IntrinsicOp;
+namespace dxilutil {
+class ExportMap;
+}
+}
+
+namespace CGHLSLMSHelper {
+
+struct EntryFunctionInfo {
+  clang::SourceLocation SL = clang::SourceLocation();
+  llvm::Function *Func = nullptr;
+};
+
+  // Map to save patch constant functions
+struct PatchConstantInfo {
+  clang::SourceLocation SL = clang::SourceLocation();
+  llvm::Function *Func = nullptr;
+  std::uint32_t NumOverloads = 0;
+};
+
+/// Use this class to represent HLSL cbuffer in high-level DXIL.
+class HLCBuffer : public hlsl::DxilCBuffer {
+public:
+  HLCBuffer() = default;
+  virtual ~HLCBuffer() = default;
+
+  void AddConst(std::unique_ptr<DxilResourceBase> &pItem) {
+    pItem->SetID(constants.size());
+    constants.push_back(std::move(pItem));
+  }
+
+  std::vector<std::unique_ptr<DxilResourceBase>> &GetConstants() {
+    return constants;
+  }
+
+private:
+  std::vector<std::unique_ptr<DxilResourceBase>>
+      constants; // constants inside const buffer
+};
+
+// Align cbuffer offset in legacy mode (16 bytes per row).
+unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
+                                   unsigned scalarSizeInBytes,
+                                   bool bNeedNewRow);
+
+void FinishEntries(hlsl::HLModule &HLM, const EntryFunctionInfo &Entry,
+                   clang::CodeGen::CodeGenModule &CGM,
+                   llvm::StringMap<EntryFunctionInfo> &entryFunctionMap,
+                   std::unordered_map<llvm::Function *,
+                                      const clang::HLSLPatchConstantFuncAttr *>
+                       &HSEntryPatchConstantFuncAttr,
+                   llvm::StringMap<PatchConstantInfo> &patchConstantFunctionMap,
+                   std::unordered_map<llvm::Function *,
+                                      std::unique_ptr<hlsl::DxilFunctionProps>>
+                       &patchConstantFunctionPropsMap);
+
+void FinishIntrinsics(
+    hlsl::HLModule &HLM, std::vector<std::pair<llvm::Function *, unsigned>> &intrinsicMap,
+    llvm::DenseMap<llvm::Value *, hlsl::DxilResourceProperties>
+        &valToResPropertiesMap);
+
+void ReplaceConstStaticGlobals(
+    std::unordered_map<llvm::GlobalVariable *, std::vector<llvm::Constant *>>
+        &staticConstGlobalInitListMap,
+    std::unordered_map<llvm::GlobalVariable *, llvm::Function *> &staticConstGlobalCtorMap);
+
+void FinishClipPlane(hlsl::HLModule &HLM, std::vector<llvm::Function *> &clipPlaneFuncList,
+                    std::unordered_map<llvm::Value *, llvm::DebugLoc> &debugInfoMap,
+                    clang::CodeGen::CodeGenModule &CGM);
+
+void AddRegBindingsForResourceInConstantBuffer(
+    hlsl::HLModule &HLM,
+    llvm::DenseMap<llvm::Constant *,
+                   llvm::SmallVector<std::pair<hlsl::DXIL::ResourceClass, unsigned>,
+                                     1>> &constantRegBindingMap);
+
+void FinishCBuffer(
+    hlsl::HLModule &HLM, llvm::Type *CBufferType,
+    std::unordered_map<llvm::Constant *, hlsl::DxilFieldAnnotation>
+        &AnnotationMap);
+
+void ProcessCtorFunctions(llvm::Module &M, llvm::StringRef globalName,
+                          llvm::Instruction *InsertPt);
+
+void TranslateRayQueryConstructor(hlsl::HLModule &HLM);
+
+void UpdateLinkage(
+    hlsl::HLModule &HLM, clang::CodeGen::CodeGenModule &CGM,
+    hlsl::dxilutil::ExportMap &exportMap,
+    llvm::StringMap<EntryFunctionInfo> &entryFunctionMap,
+    llvm::StringMap<PatchConstantInfo> &patchConstantFunctionMap);
+
+llvm::Value *TryEvalIntrinsic(llvm::CallInst *CI, hlsl::IntrinsicOp intriOp);
+void SimpleTransformForHLDXIR(llvm::Module *pM);
+void ExtensionCodeGen(hlsl::HLModule &HLM, clang::CodeGen::CodeGenModule &CGM);
+} // namespace CGHLSLMSHelper

+ 49 - 0
tools/clang/lib/CodeGen/CGHLSLRootSignature.cpp

@@ -0,0 +1,49 @@
+//===----- CGHLSLRootSignature.cpp - Compile root signature---------------===//
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// CGHLSLRootSignature.cpp                                                   //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+//  This provides clang::CompileRootSignature.                               //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "dxc/DXIL/DxilConstants.h"
+#include "dxc/DxilRootSignature/DxilRootSignature.h"
+#include "dxc/Support/WinIncludes.h"    // stream support
+#include "dxc/dxcapi.h"                 // stream support
+#include "clang/Parse/ParseHLSL.h" // root sig would be in Parser if part of lang
+#include "dxc/dxcapi.h"
+
+using namespace llvm;
+
+void clang::CompileRootSignature(
+    StringRef rootSigStr, DiagnosticsEngine &Diags, SourceLocation SLoc,
+    hlsl::DxilRootSignatureVersion rootSigVer,
+    hlsl::DxilRootSignatureCompilationFlags flags,
+    hlsl::RootSignatureHandle *pRootSigHandle) {
+  std::string OSStr;
+  llvm::raw_string_ostream OS(OSStr);
+  hlsl::DxilVersionedRootSignatureDesc *D = nullptr;
+
+  if (ParseHLSLRootSignature(rootSigStr.data(), rootSigStr.size(), rootSigVer,
+                             flags, &D, SLoc, Diags)) {
+    CComPtr<IDxcBlob> pSignature;
+    CComPtr<IDxcBlobEncoding> pErrors;
+    hlsl::SerializeRootSignature(D, &pSignature, &pErrors, false);
+    if (pSignature == nullptr) {
+      assert(pErrors != nullptr && "else serialize failed with no msg");
+      ReportHLSLRootSigError(Diags, SLoc, (char *)pErrors->GetBufferPointer(),
+                             pErrors->GetBufferSize());
+      hlsl::DeleteRootSignature(D);
+    } else {
+      pRootSigHandle->Assign(D, pSignature);
+    }
+  }
+}

+ 2 - 0
tools/clang/lib/CodeGen/CMakeLists.txt

@@ -55,6 +55,8 @@ add_clang_library(clangCodeGen
   CGExprScalar.cpp
   CGHLSLRuntime.cpp
   CGHLSLMS.cpp
+  CGHLSLMSFinishCodeGen.cpp
+  CGHLSLRootSignature.cpp
   CGLoopInfo.cpp
   CGObjC.cpp
   # CGObjCGNU.cpp # HLSL Change - no definitions used

+ 1 - 1
tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/global/global-var-write-test04.hlsl

@@ -19,7 +19,7 @@
 // CHECK-NOT: {{(.*g_a.*)(.*static.copy.*)}} = internal global [5 x i32] zeroinitializer
 // CHECK-NOT: {{(.*g_a2d.*)(.*static.copy.*)}} = internal global [3 x [2 x i32]] zeroinitializer
 // CHECK: define <4 x float> @main
-// CHECK: ret %dx.types.Handle undef
+
 
 float g_s1;
 float g_s2; // write enabled