2
0
Эх сурвалжийг харах

Skip copy-in/copy-out for constant global variables. (#2836)

* Skip copy-in/copy-out for constant global variables.

* Enable copy for noinline.
TODO: analysis for global variable alias with parameter.

* Use SetVector and skip resource when copy.

* Disable mayAliasWithGlobal because optimization already covered case not replace when have alias.
When replace const global to a normal value has store, mark it non-constant.
Xiang Li 5 жил өмнө
parent
commit
49310e2b2c

+ 4 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -115,6 +115,10 @@ void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry&);
 FunctionPass *createCleanupDxBreakPass();
 void initializeCleanupDxBreakPass(llvm::PassRegistry&);
 
+
+ModulePass *createHLLegalizeParameter();
+void initializeHLLegalizeParameterPass(llvm::PassRegistry &);
+
 bool AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense);
 
 }

+ 94 - 0
include/dxc/HLSL/HLUtil.h

@@ -0,0 +1,94 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// HLUtil.h                                                                  //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// HL helper functions.                                                      //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+class Function;
+class Value;
+class MemCpyInst;
+} // namespace llvm
+
+namespace hlsl {
+class DxilTypeSystem;
+
+namespace hlutil {
+
+struct PointerStatus {
+  /// Keep track of what stores to the pointer look like.
+  enum class StoredType {
+    /// There is no store to this pointer.  It can thus be marked constant.
+    NotStored,
+
+    /// This ptr is a global, and is stored to, but the only thing stored is the
+    /// constant it
+    /// was initialized with. This is only tracked for scalar globals.
+    InitializerStored,
+
+    /// This ptr is stored to, but only its initializer and one other value
+    /// is ever stored to it.  If this global isStoredOnce, we track the value
+    /// stored to it in StoredOnceValue below.  This is only tracked for scalar
+    /// globals.
+    StoredOnce,
+
+    /// This ptr is only assigned by a memcpy.
+    MemcopyDestOnce,
+
+    /// This ptr is stored to by multiple values or something else that we
+    /// cannot track.
+    Stored
+  } storedType;
+  /// Keep track of what loaded from the pointer look like.
+  enum class LoadedType {
+    /// There is no load to this pointer.  It can thus be marked constant.
+    NotLoaded,
+
+    /// This ptr is only used by a memcpy.
+    MemcopySrcOnce,
+
+    /// This ptr is loaded to by multiple instructions or something else that we
+    /// cannot track.
+    Loaded
+  } loadedType;
+  /// If only one value (besides the initializer constant) is ever stored to
+  /// this global, keep track of what value it is.
+  llvm::Value *StoredOnceValue;
+  /// Memcpy which this ptr is used.
+  llvm::SetVector<llvm::MemCpyInst *> memcpySet;
+  /// Memcpy which use this ptr as dest.
+  llvm::MemCpyInst *StoringMemcpy;
+  /// Memcpy which use this ptr as src.
+  llvm::MemCpyInst *LoadingMemcpy;
+  /// These start out null/false.  When the first accessing function is noticed,
+  /// it is recorded. When a second different accessing function is noticed,
+  /// HasMultipleAccessingFunctions is set to true.
+  const llvm::Function *AccessingFunction;
+  bool HasMultipleAccessingFunctions;
+  /// Size of the ptr.
+  unsigned Size;
+  llvm::Value *Ptr;
+  // Just check load store.
+  bool bLoadStoreOnly;
+
+  void analyze(DxilTypeSystem &typeSys, bool bStructElt);
+
+  PointerStatus(llvm::Value *ptr, unsigned size, bool bLdStOnly);
+  void MarkAsStored();
+  void MarkAsLoaded();
+  bool HasStored();
+  bool HasLoaded();
+};
+
+} // namespace hlutil
+
+} // namespace hlsl

+ 2 - 0
lib/HLSL/CMakeLists.txt

@@ -35,6 +35,7 @@ add_llvm_library(LLVMHLSL
   DxcOptimizer.cpp
   HLDeadFunctionElimination.cpp
   HLExpandStoreIntrinsics.cpp
+  HLLegalizeParameter.cpp
   HLLowerUDT.cpp
   HLMatrixBitcastLowerPass.cpp
   HLMatrixLowerPass.cpp
@@ -48,6 +49,7 @@ add_llvm_library(LLVMHLSL
   HLPreprocess.cpp
   HLResource.cpp
   HLSignatureLower.cpp
+  HLUtil.cpp
   PauseResumePasses.cpp
   WaveSensitivityAnalysis.cpp
 

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -130,6 +130,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeHLEmitMetadataPass(Registry);
     initializeHLEnsureMetadataPass(Registry);
     initializeHLExpandStoreIntrinsicsPass(Registry);
+    initializeHLLegalizeParameterPass(Registry);
     initializeHLMatrixLowerPassPass(Registry);
     initializeHLPreprocessPass(Registry);
     initializeHoistConstantArrayPass(Registry);

+ 312 - 0
lib/HLSL/HLLegalizeParameter.cpp

@@ -0,0 +1,312 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// HLLegalizeParameter.cpp                                                   //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Legalize in parameter has write and out parameter has read.               //
+// Must be call before inline pass.                                          //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/HLModule.h"
+#include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilUtil.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLUtil.h"
+#include "dxc/DXIL/DxilTypeSystem.h"
+
+#include "llvm/IR/IntrinsicInst.h"
+
+#include "dxc/Support/Global.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+
+#include <vector>
+
+using namespace llvm;
+using namespace hlsl;
+
+// For parameter need to legalize, create alloca to replace all uses of it, and copy between the alloca and the parameter.
+
+namespace {
+
+class HLLegalizeParameter : public ModulePass {
+public:
+  static char ID;
+  explicit HLLegalizeParameter() : ModulePass(ID) {}
+  bool runOnModule(Module &M) override;
+
+private:
+  void patchWriteOnInParam(Function &F, Argument &Arg, const DataLayout &DL);
+  void patchReadOnOutParam(Function &F, Argument &Arg, const DataLayout &DL);
+};
+
+AllocaInst *createAllocaForPatch(Function &F, Type *Ty) {
+  IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
+  return Builder.CreateAlloca(Ty);
+}
+
+void copyIn(AllocaInst *temp, Value *arg, CallInst *CI, unsigned size) {
+  if (size == 0)
+    return;
+  // copy arg to temp befor CI.
+  IRBuilder<> Builder(CI);
+  Builder.CreateMemCpy(temp, arg, size, 1);
+}
+
+void copyOut(AllocaInst *temp, Value *arg, CallInst *CI, unsigned size) {
+  if (size == 0)
+    return;
+  // copy temp to arg after CI.
+  IRBuilder<> Builder(CI->getNextNode());
+  Builder.CreateMemCpy(arg, temp, size, 1);
+}
+
+bool isPointerNeedToLower(Value *V, Type *HandleTy) {
+  // CBuffer, Buffer, Texture....
+  // Anything related to dxil op.
+  // hl.subscript.
+  // Got to root of GEP.
+  while (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    V = GEP->getPointerOperand();
+  }
+  CallInst *CI = dyn_cast<CallInst>(V);
+  if (!CI)
+    return false;
+  HLOpcodeGroup group = GetHLOpcodeGroup(CI->getCalledFunction());
+  if (group != HLOpcodeGroup::HLSubscript)
+    return false;
+  Value *Ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
+
+  // Ptr from resource handle.
+  if (Ptr->getType() == HandleTy)
+    return true;
+  unsigned Opcode = GetHLOpcode(CI);
+  // Ptr from cbuffer.
+  if (Opcode == (unsigned)HLSubscriptOpcode::CBufferSubscript)
+    return true;
+
+  return isPointerNeedToLower(Ptr, HandleTy);
+}
+
+bool mayAliasWithGlobal(Value *V, CallInst *CallSite, std::vector<GlobalVariable *> &staticGVs) {
+  // The unsafe case need copy-in copy-out will be global variable alias with
+  // parameter. Then global variable is updated in the function, the parameter
+  // will be updated silently.
+
+  // Currently add copy for all non-const static global in
+  // CGMSHLSLRuntime::EmitHLSLOutParamConversionInit.
+  //So here just return false and do nothing.
+  // For case like
+  // struct T {
+  //  float4 a[10];
+  //};
+  // static T g;
+  // void foo(inout T t) {
+  //  // modify g
+  //}
+  // void bar() {
+  //  T t = g;
+  //  // Not copy because t is local.
+  //  // But optimizations will change t to g later.
+  //  foo(t);
+  //}
+  // Optimizations which remove the copy should not replace foo(t) into foo(g)
+  // when g could be modified.
+  // TODO: remove copy for global in
+  // CGMSHLSLRuntime::EmitHLSLOutParamConversionInit, do analysis to check alias
+  // only generate copy when there's alias.
+  return false;
+}
+
+struct CopyData {
+  CallInst *CallSite;
+  Value *Arg;
+  bool bCopyIn;
+  bool bCopyOut;
+};
+
+void ParameterCopyInCopyOut(hlsl::HLModule &HLM) {
+  Module &M = *HLM.GetModule();
+  Type *HandleTy = HLM.GetOP()->GetHandleType();
+  const DataLayout &DL = M.getDataLayout();
+
+  std::vector<GlobalVariable *> staticGVs;
+  for (GlobalVariable &GV : M.globals()) {
+    if (dxilutil::IsStaticGlobal(&GV) && !GV.isConstant()) {
+      staticGVs.emplace_back(&GV);
+    }
+  }
+
+  SmallVector<CopyData, 4> WorkList;
+  for (Function &F : M) {
+    if (F.user_empty())
+      continue;
+    DxilFunctionAnnotation *Annot = HLM.GetFunctionAnnotation(&F);
+    // Skip functions don't have annotation, include llvm intrinsic and HLOp
+    // functions.
+    if (!Annot)
+      continue;
+
+    bool bNoInline = F.hasFnAttribute(llvm::Attribute::NoInline) || F.isDeclaration();
+
+    for (User *U : F.users()) {
+      CallInst *CI = dyn_cast<CallInst>(U);
+      if (!CI)
+        continue;
+      for (unsigned i = 0; i < CI->getNumArgOperands(); i++) {
+        Value *arg = CI->getArgOperand(i);
+
+        if (!arg->getType()->isPointerTy())
+          continue;
+
+        DxilParameterAnnotation &ParamAnnot = Annot->GetParameterAnnotation(i);
+        bool bCopyIn = false;
+        bool bCopyOut = false;
+        switch (ParamAnnot.GetParamInputQual()) {
+        default:
+          break;
+        case DxilParamInputQual::In: {
+          bCopyIn = true;
+        } break;
+        case DxilParamInputQual::Out: {
+          bCopyOut = true;
+        } break;
+        case DxilParamInputQual::Inout: {
+          bCopyIn = true;
+          bCopyOut = true;
+        } break;
+        }
+
+        if (!bCopyIn && !bCopyOut)
+          continue;
+
+        // When use ptr from cbuffer/buffer, need copy to avoid lower on user
+        // function.
+        bool bNeedCopy = mayAliasWithGlobal(arg, CI, staticGVs);
+        if (bNoInline)
+          bNeedCopy |= isPointerNeedToLower(arg, HandleTy);
+
+        if (!bNeedCopy)
+          continue;
+
+        CopyData data = {CI, arg, bCopyIn, bCopyOut};
+        WorkList.emplace_back(data);
+      }
+    }
+  }
+
+  for (CopyData &data : WorkList) {
+    CallInst *CI = data.CallSite;
+    Value *arg = data.Arg;
+    Type *Ty = arg->getType()->getPointerElementType();
+    Type *EltTy = dxilutil::GetArrayEltTy(Ty);
+    // Skip on object type and resource type.
+    if (dxilutil::IsHLSLObjectType(EltTy) ||
+        dxilutil::IsHLSLResourceType(EltTy))
+      continue;
+    unsigned size = DL.getTypeAllocSize(Ty);
+    AllocaInst *temp = createAllocaForPatch(*CI->getParent()->getParent(), Ty);
+    if (data.bCopyIn)
+      copyIn(temp, arg, CI, size);
+    if (data.bCopyOut)
+      copyOut(temp, arg, CI, size);
+    CI->replaceUsesOfWith(arg, temp);
+  }
+}
+
+} // namespace
+
+bool HLLegalizeParameter::runOnModule(Module &M) {
+  HLModule &HLM = M.GetOrCreateHLModule();
+  auto &typeSys = HLM.GetTypeSystem();
+  const DataLayout &DL = M.getDataLayout();
+
+  for (Function &F : M) {
+    if (F.isDeclaration())
+      continue;
+    DxilFunctionAnnotation *Annot = HLM.GetFunctionAnnotation(&F);
+    if (!Annot)
+      continue;
+
+    for (Argument &Arg : F.args()) {
+      if (!Arg.getType()->isPointerTy())
+        continue;
+      Type *EltTy = dxilutil::GetArrayEltTy(Arg.getType());
+      if (dxilutil::IsHLSLObjectType(EltTy) ||
+          dxilutil::IsHLSLResourceType(EltTy))
+        continue;
+
+      DxilParameterAnnotation &ParamAnnot =
+          Annot->GetParameterAnnotation(Arg.getArgNo());
+      switch (ParamAnnot.GetParamInputQual()) {
+      default:
+        break;
+      case DxilParamInputQual::In: {
+        hlutil::PointerStatus PS(&Arg, 0, /*bLdStOnly*/ true);
+        PS.analyze(typeSys, /*bStructElt*/ false);
+        if (PS.HasStored()) {
+          patchWriteOnInParam(F, Arg, DL);
+        }
+      } break;
+      case DxilParamInputQual::Out: {
+        hlutil::PointerStatus PS(&Arg, 0, /*bLdStOnly*/ true);
+        PS.analyze(typeSys, /*bStructElt*/false);
+        if (PS.HasLoaded()) {
+          patchReadOnOutParam(F, Arg, DL);
+        }
+      }
+      }
+    }
+  }
+
+  // Copy-in copy-out for ptr arg when need.
+  ParameterCopyInCopyOut(HLM);
+
+  return true;
+}
+
+void HLLegalizeParameter::patchWriteOnInParam(Function &F, Argument &Arg,
+                                              const DataLayout &DL) {
+  Type *Ty = Arg.getType()->getPointerElementType();
+  AllocaInst *temp = createAllocaForPatch(F, Ty);
+  Arg.replaceAllUsesWith(temp);
+  IRBuilder<> Builder(temp->getNextNode());
+  unsigned size = DL.getTypeAllocSize(Ty);
+  // copy arg to temp at beginning of function.
+  Builder.CreateMemCpy(temp, &Arg, size, 1);
+}
+
+void HLLegalizeParameter::patchReadOnOutParam(Function &F, Argument &Arg,
+                                              const DataLayout &DL) {
+  Type *Ty = Arg.getType()->getPointerElementType();
+  AllocaInst *temp = createAllocaForPatch(F, Ty);
+  Arg.replaceAllUsesWith(temp);
+
+  unsigned size = DL.getTypeAllocSize(Ty);
+  for (auto &BB : F.getBasicBlockList()) {
+    // copy temp to arg before every return.
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
+      IRBuilder<> RetBuilder(RI);
+      RetBuilder.CreateMemCpy(&Arg, temp, size, 1);
+    }
+  }
+}
+
+char HLLegalizeParameter::ID = 0;
+ModulePass *llvm::createHLLegalizeParameter() {
+  return new HLLegalizeParameter();
+}
+
+INITIALIZE_PASS(HLLegalizeParameter, "hl-legalize-parameter",
+                "Legalize parameter", false, false)

+ 196 - 0
lib/HLSL/HLUtil.cpp

@@ -0,0 +1,196 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// HLUtil.cpp                                                                //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// HL helper functions.                                                      //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/HLUtil.h"
+#include "dxc/HLSL/HLOperations.h"
+#include "dxc/DXIL/DxilTypeSystem.h"
+
+#include "dxc/Support/Global.h"
+
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+
+using namespace llvm;
+using namespace hlsl;
+using namespace hlsl::hlutil;
+
+namespace {
+void analyzePointer(const Value *V, PointerStatus &PS, DxilTypeSystem &typeSys,
+                    bool bStructElt, bool bLdStOnly) {
+  // Early return when only care load store.
+  if (bLdStOnly) {
+    if (PS.HasLoaded() && PS.HasStored())
+       return;
+  }
+  for (const User *U : V->users()) {
+    if (const Instruction *I = dyn_cast<Instruction>(U)) {
+      const Function *F = I->getParent()->getParent();
+      if (!PS.AccessingFunction) {
+        PS.AccessingFunction = F;
+      } else {
+        if (F != PS.AccessingFunction)
+          PS.HasMultipleAccessingFunctions = true;
+      }
+    }
+
+    if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(U)) {
+      analyzePointer(BC, PS, typeSys, bStructElt, bLdStOnly);
+    } else if (const MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
+      // Do not collect memcpy on struct GEP use.
+      // These memcpy will be flattened in next level.
+      if (!bStructElt) {
+        MemCpyInst *MI = const_cast<MemCpyInst *>(MC);
+        PS.memcpySet.insert(MI);
+        bool bFullCopy = false;
+        if (ConstantInt *Length = dyn_cast<ConstantInt>(MC->getLength())) {
+          bFullCopy = PS.Size == Length->getLimitedValue() || PS.Size == 0 ||
+                      Length->getLimitedValue() == 0; // handle unbounded arrays
+        }
+        if (MC->getRawDest() == V) {
+          if (bFullCopy &&
+              PS.storedType == PointerStatus::StoredType::NotStored) {
+            PS.storedType = PointerStatus::StoredType::MemcopyDestOnce;
+            PS.StoringMemcpy = MI;
+          } else {
+            PS.MarkAsStored();
+            PS.StoringMemcpy = nullptr;
+          }
+        } else if (MC->getRawSource() == V) {
+          if (bFullCopy &&
+              PS.loadedType == PointerStatus::LoadedType::NotLoaded) {
+            PS.loadedType = PointerStatus::LoadedType::MemcopySrcOnce;
+            PS.LoadingMemcpy = MI;
+          } else {
+            PS.MarkAsLoaded();
+            PS.LoadingMemcpy = nullptr;
+          }
+        }
+      } else {
+        if (MC->getRawDest() == V) {
+          PS.MarkAsStored();
+        } else {
+          DXASSERT(MC->getRawSource() == V, "must be source here");
+          PS.MarkAsLoaded();
+        }
+      }
+    } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+      gep_type_iterator GEPIt = gep_type_begin(GEP);
+      gep_type_iterator GEPEnd = gep_type_end(GEP);
+      // Skip pointer idx.
+      GEPIt++;
+      // Struct elt will be flattened in next level.
+      bool bStructElt = (GEPIt != GEPEnd) && GEPIt->isStructTy();
+      analyzePointer(GEP, PS, typeSys, bStructElt, bLdStOnly);
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      Value *V = SI->getOperand(0);
+
+      if (PS.storedType == PointerStatus::StoredType::NotStored) {
+        PS.storedType = PointerStatus::StoredType::StoredOnce;
+        PS.StoredOnceValue = V;
+      } else {
+        PS.MarkAsStored();
+      }
+    } else if (dyn_cast<LoadInst>(U)) {
+      PS.MarkAsLoaded();
+    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+      Function *F = CI->getCalledFunction();
+      DxilFunctionAnnotation *annotation = typeSys.GetFunctionAnnotation(F);
+      if (!annotation) {
+        HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
+        switch (group) {
+        case HLOpcodeGroup::HLMatLoadStore: {
+          HLMatLoadStoreOpcode opcode =
+              static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
+          switch (opcode) {
+          case HLMatLoadStoreOpcode::ColMatLoad:
+          case HLMatLoadStoreOpcode::RowMatLoad:
+            PS.MarkAsLoaded();
+            break;
+          case HLMatLoadStoreOpcode::ColMatStore:
+          case HLMatLoadStoreOpcode::RowMatStore:
+            PS.MarkAsStored();
+            break;
+          default:
+            DXASSERT(0, "invalid opcode");
+            PS.MarkAsStored();
+            PS.MarkAsLoaded();
+          }
+        } break;
+        case HLOpcodeGroup::HLSubscript: {
+          HLSubscriptOpcode opcode =
+              static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
+          switch (opcode) {
+          case HLSubscriptOpcode::VectorSubscript:
+          case HLSubscriptOpcode::ColMatElement:
+          case HLSubscriptOpcode::ColMatSubscript:
+          case HLSubscriptOpcode::RowMatElement:
+          case HLSubscriptOpcode::RowMatSubscript:
+            analyzePointer(CI, PS, typeSys, bStructElt, bLdStOnly);
+            break;
+          default:
+            // Rest are resource ptr like buf[i].
+            // Only read of resource handle.
+            PS.MarkAsLoaded();
+            break;
+          }
+        } break;
+        default: {
+          // If not sure its out param or not. Take as out param.
+          PS.MarkAsStored();
+          PS.MarkAsLoaded();
+        }
+        }
+        continue;
+      }
+
+      unsigned argSize = F->arg_size();
+      for (unsigned i = 0; i < argSize; i++) {
+        Value *arg = CI->getArgOperand(i);
+        if (V == arg) {
+          // Do not replace struct arg.
+          // Mark stored and loaded to disable replace.
+          PS.MarkAsStored();
+          PS.MarkAsLoaded();
+        }
+      }
+    }
+  }
+}
+}
+
+namespace hlsl {
+namespace hlutil {
+
+void PointerStatus::analyze(DxilTypeSystem &typeSys, bool bStructElt) {
+  analyzePointer(Ptr, *this, typeSys, bStructElt, bLoadStoreOnly);
+}
+
+PointerStatus::PointerStatus(llvm::Value *ptr, unsigned size, bool bLdStOnly)
+    : storedType(StoredType::NotStored), loadedType(LoadedType::NotLoaded),
+      StoredOnceValue(nullptr), StoringMemcpy(nullptr), LoadingMemcpy(nullptr),
+      AccessingFunction(nullptr), HasMultipleAccessingFunctions(false),
+      Size(size), Ptr(ptr), bLoadStoreOnly(bLdStOnly) {}
+
+void PointerStatus::MarkAsStored() {
+  storedType = StoredType::Stored;
+  StoredOnceValue = nullptr;
+}
+void PointerStatus::MarkAsLoaded() { loadedType = LoadedType::Loaded; }
+bool PointerStatus::HasStored() {
+  return storedType != StoredType::NotStored &&
+         storedType != StoredType::InitializerStored;
+}
+bool PointerStatus::HasLoaded() { return loadedType != LoadedType::NotLoaded; }
+
+} // namespace hlutil
+} // namespace hlsl

+ 4 - 0
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -331,6 +331,8 @@ void PassManagerBuilder::populateModulePassManager(
       MPM.add(createDxilInsertPreservesPass()); // HLSL Change - insert preserve instructions
 
     if (Inliner) {
+      MPM.add(createHLLegalizeParameter()); // HLSL Change - legalize parameters
+                                            // before inline.
       MPM.add(Inliner);
       Inliner = nullptr;
     }
@@ -375,6 +377,8 @@ void PassManagerBuilder::populateModulePassManager(
   }
 
   // HLSL Change Begins
+
+  MPM.add(createHLLegalizeParameter()); // legalize parameters before inline.
   MPM.add(createAlwaysInlinerPass(/*InsertLifeTime*/false));
   if (Inliner) {
     delete Inliner;

+ 33 - 229
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -59,6 +59,7 @@
 #include "dxc/HLSL/HLMatrixType.h"
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/HLSL/HLLowerUDT.h"
+#include "dxc/HLSL/HLUtil.h"
 #include <deque>
 #include <unordered_map>
 #include <unordered_set>
@@ -3374,214 +3375,6 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
   return true;
 }
 
-struct PointerStatus {
-  /// Keep track of what stores to the pointer look like.
-  enum StoredType {
-    /// There is no store to this pointer.  It can thus be marked constant.
-    NotStored,
-
-    /// This ptr is a global, and is stored to, but the only thing stored is the
-    /// constant it
-    /// was initialized with. This is only tracked for scalar globals.
-    InitializerStored,
-
-    /// This ptr is stored to, but only its initializer and one other value
-    /// is ever stored to it.  If this global isStoredOnce, we track the value
-    /// stored to it in StoredOnceValue below.  This is only tracked for scalar
-    /// globals.
-    StoredOnce,
-
-    /// This ptr is only assigned by a memcpy.
-    MemcopyDestOnce,
-
-    /// This ptr is stored to by multiple values or something else that we
-    /// cannot track.
-    Stored
-  } storedType;
-  /// Keep track of what loaded from the pointer look like.
-  enum LoadedType {
-    /// There is no load to this pointer.  It can thus be marked constant.
-    NotLoaded,
-
-    /// This ptr is only used by a memcpy.
-    MemcopySrcOnce,
-
-    /// This ptr is loaded to by multiple instructions or something else that we
-    /// cannot track.
-    Loaded
-  } loadedType;
-  /// If only one value (besides the initializer constant) is ever stored to
-  /// this global, keep track of what value it is.
-  Value *StoredOnceValue;
-  /// Memcpy which this ptr is used.
-  std::unordered_set<MemCpyInst *> memcpySet;
-  /// Memcpy which use this ptr as dest.
-  MemCpyInst *StoringMemcpy;
-  /// Memcpy which use this ptr as src.
-  MemCpyInst *LoadingMemcpy;
-  /// These start out null/false.  When the first accessing function is noticed,
-  /// it is recorded. When a second different accessing function is noticed,
-  /// HasMultipleAccessingFunctions is set to true.
-  const Function *AccessingFunction;
-  bool HasMultipleAccessingFunctions;
-  /// Size of the ptr.
-  unsigned Size;
-
-  /// Look at all uses of the global and fill in the GlobalStatus structure.  If
-  /// the global has its address taken, return true to indicate we can't do
-  /// anything with it.
-  static void analyzePointer(const Value *V, PointerStatus &PS,
-                             DxilTypeSystem &typeSys, bool bStructElt);
-
-  PointerStatus(unsigned size)
-      : storedType(StoredType::NotStored), loadedType(LoadedType::NotLoaded), StoredOnceValue(nullptr),
-        StoringMemcpy(nullptr), LoadingMemcpy(nullptr),
-        AccessingFunction(nullptr), HasMultipleAccessingFunctions(false),
-        Size(size) {}
-  void MarkAsStored() {
-    storedType = StoredType::Stored;
-    StoredOnceValue = nullptr;
-  }
-  void MarkAsLoaded() { loadedType = LoadedType::Loaded; }
-};
-
-void PointerStatus::analyzePointer(const Value *V, PointerStatus &PS,
-                                   DxilTypeSystem &typeSys, bool bStructElt) {
-  for (const User *U : V->users()) {
-    if (const Instruction *I = dyn_cast<Instruction>(U)) {
-      const Function *F = I->getParent()->getParent();
-      if (!PS.AccessingFunction) {
-        PS.AccessingFunction = F;
-      } else {
-        if (F != PS.AccessingFunction)
-          PS.HasMultipleAccessingFunctions = true;
-      }
-    }
-
-    if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(U)) {
-      analyzePointer(BC, PS, typeSys, bStructElt);
-    } else if (const MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
-      // Do not collect memcpy on struct GEP use.
-      // These memcpy will be flattened in next level.
-      if (!bStructElt) {
-        MemCpyInst *MI = const_cast<MemCpyInst *>(MC);
-        PS.memcpySet.insert(MI);
-        bool bFullCopy = false;
-        if (ConstantInt *Length = dyn_cast<ConstantInt>(MC->getLength())) {
-          bFullCopy = PS.Size == Length->getLimitedValue()
-            || PS.Size == 0 || Length->getLimitedValue() == 0;  // handle unbounded arrays
-        }
-        if (MC->getRawDest() == V) {
-          if (bFullCopy &&
-              PS.storedType == StoredType::NotStored) {
-            PS.storedType = StoredType::MemcopyDestOnce;
-            PS.StoringMemcpy = MI;
-          } else {
-            PS.MarkAsStored();
-            PS.StoringMemcpy = nullptr;
-          }
-        } else if (MC->getRawSource() == V) {
-          if (bFullCopy &&
-              PS.loadedType == LoadedType::NotLoaded) {
-            PS.loadedType = LoadedType::MemcopySrcOnce;
-            PS.LoadingMemcpy = MI;
-          } else {
-            PS.MarkAsLoaded();
-            PS.LoadingMemcpy = nullptr;
-          }
-        }
-      } else {
-        if (MC->getRawDest() == V) {
-          PS.MarkAsStored();
-        } else {
-          DXASSERT(MC->getRawSource() == V, "must be source here");
-          PS.MarkAsLoaded();
-        }
-      }
-    } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-      gep_type_iterator GEPIt = gep_type_begin(GEP);
-      gep_type_iterator GEPEnd = gep_type_end(GEP);
-      // Skip pointer idx.
-      GEPIt++;
-      // Struct elt will be flattened in next level.
-      bool bStructElt = (GEPIt != GEPEnd) && GEPIt->isStructTy();
-      analyzePointer(GEP, PS, typeSys, bStructElt);
-    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      Value *V = SI->getOperand(0);
-
-      if (PS.storedType == StoredType::NotStored) {
-        PS.storedType = StoredType::StoredOnce;
-        PS.StoredOnceValue = V;
-      } else {
-        PS.MarkAsStored();
-      }
-    } else if (dyn_cast<LoadInst>(U)) {
-      PS.MarkAsLoaded();
-    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
-      Function *F = CI->getCalledFunction();
-      DxilFunctionAnnotation *annotation = typeSys.GetFunctionAnnotation(F);
-      if (!annotation) {
-        HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
-        switch (group) {
-        case HLOpcodeGroup::HLMatLoadStore: {
-          HLMatLoadStoreOpcode opcode =
-              static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
-          switch (opcode) {
-          case HLMatLoadStoreOpcode::ColMatLoad:
-          case HLMatLoadStoreOpcode::RowMatLoad:
-            PS.MarkAsLoaded();
-            break;
-          case HLMatLoadStoreOpcode::ColMatStore:
-          case HLMatLoadStoreOpcode::RowMatStore:
-            PS.MarkAsStored();
-            break;
-          default:
-            DXASSERT(0, "invalid opcode");
-            PS.MarkAsStored();
-            PS.MarkAsLoaded();
-          }
-        } break;
-        case HLOpcodeGroup::HLSubscript: {
-          HLSubscriptOpcode opcode =
-              static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
-          switch (opcode) {
-          case HLSubscriptOpcode::VectorSubscript:
-          case HLSubscriptOpcode::ColMatElement:
-          case HLSubscriptOpcode::ColMatSubscript:
-          case HLSubscriptOpcode::RowMatElement:
-          case HLSubscriptOpcode::RowMatSubscript:
-            analyzePointer(CI, PS, typeSys, bStructElt);
-            break;
-          default:
-            // Rest are resource ptr like buf[i].
-            // Only read of resource handle.
-            PS.MarkAsLoaded();
-            break;
-          }
-        } break;
-        default: {
-          // If not sure its out param or not. Take as out param.
-          PS.MarkAsStored();
-          PS.MarkAsLoaded();
-        }
-        }
-        continue;
-      }
-
-      unsigned argSize = F->arg_size();
-      for (unsigned i = 0; i < argSize; i++) {
-        Value *arg = CI->getArgOperand(i);
-        if (V == arg) {
-          // Do not replace struct arg.
-          // Mark stored and loaded to disable replace.
-          PS.MarkAsStored();
-          PS.MarkAsLoaded();
-        }
-      }
-    }
-  }
-}
-
 static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder) {
   for (auto it = C->user_begin(); it != C->user_end(); ) {
     User *U = *(it++);
@@ -3754,6 +3547,15 @@ static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
     }
   }
 
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Src)) {
+    // For const GV, if has stored, mark as non-constant.
+    if (GV->isConstant()) {
+      hlutil::PointerStatus PS(GV, 0, /*bLdStOnly*/ true);
+      PS.analyze(typeSys, /*bStructElt*/ false);
+      if (PS.HasStored())
+        GV->setConstant(false);
+    }
+  }
   Value *RawDest = MC->getOperand(0);
   Value *RawSrc = MC->getOperand(1);
   MC->eraseFromParent();
@@ -3899,16 +3701,17 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
   // if MemcpyOnce, replace with dest with src if dest is not out param.
   // else flat memcpy.
   unsigned size = DL.getTypeAllocSize(Ty->getPointerElementType());
-  PointerStatus PS(size);
+  hlutil::PointerStatus PS(V, size, /*bLdStOnly*/ false);
   const bool bStructElt = false;
   bool bEltMemcpy = true;
-  PointerStatus::analyzePointer(V, PS, typeSys, bStructElt);
+  PS.analyze(typeSys, bStructElt);
 
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
-      if (PS.storedType == PointerStatus::StoredType::NotStored) {
-        PS.storedType = PointerStatus::StoredType::InitializerStored;
-      } else if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce) {
+      if (PS.storedType == hlutil::PointerStatus::StoredType::NotStored) {
+        PS.storedType = hlutil::PointerStatus::StoredType::InitializerStored;
+      } else if (PS.storedType ==
+                 hlutil::PointerStatus::StoredType::MemcopyDestOnce) {
         // For single mem store, if the store does not dominate all users.
         // Mark it as Stored.
         // In cases like:
@@ -3920,27 +3723,28 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
         if (isa<ConstantAggregateZero>(GV->getInitializer())) {
           Instruction * Memcpy = PS.StoringMemcpy;
           if (!ReplaceUseOfZeroInitBeforeDef(Memcpy, GV)) {
-            PS.storedType = PointerStatus::StoredType::Stored;
+            PS.storedType = hlutil::PointerStatus::StoredType::Stored;
           }
         }
       } else {
-        PS.storedType = PointerStatus::StoredType::Stored;
+        PS.storedType = hlutil::PointerStatus::StoredType::Stored;
       }
     }
-  } else if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce) {
+  } else if (PS.storedType ==
+             hlutil::PointerStatus::StoredType::MemcopyDestOnce) {
     // As above, it the memcpy doesn't dominate all its users,
     // full replacement isn't possible without complicated PHI insertion
     // This will likely replace with ld/st which will be replaced in mem2reg
     Instruction *Memcpy = PS.StoringMemcpy;
     if (!DominateAllUsers(Memcpy, V)) {
-      PS.storedType = PointerStatus::StoredType::Stored;
+      PS.storedType = hlutil::PointerStatus::StoredType::Stored;
       // Replacing a memcpy with a memcpy with the same signature will just bring us back here
       bEltMemcpy = false;
     }
   }
 
   if (bAllowReplace && !PS.HasMultipleAccessingFunctions) {
-    if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce &&
+    if (PS.storedType == hlutil::PointerStatus::StoredType::MemcopyDestOnce &&
         // Skip argument for input argument has input value, it is not dest once anymore.
         !isa<Argument>(V)) {
       // Replace with src of memcpy.
@@ -3975,15 +3779,16 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
           // Resource ptr should not be replaced.
           // Need to make sure src not updated after current memcpy.
           // Check Src only have 1 store now.
-          PointerStatus SrcPS(size);
-          PointerStatus::analyzePointer(Src, SrcPS, typeSys, bStructElt);
-          if (SrcPS.storedType != PointerStatus::StoredType::Stored) {
+          hlutil::PointerStatus SrcPS(Src, size, /*bLdStOnly*/ false);
+          SrcPS.analyze(typeSys, bStructElt);
+          if (SrcPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
             ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL);
             return true;
           }
         }
       }
-    } else if (PS.loadedType == PointerStatus::LoadedType::MemcopySrcOnce) {
+    } else if (PS.loadedType ==
+               hlutil::PointerStatus::LoadedType::MemcopySrcOnce) {
       // Replace dst of memcpy.
       MemCpyInst *MC = PS.LoadingMemcpy;
       if (MC->getSourceAddressSpace() == MC->getDestAddressSpace()) {
@@ -3998,9 +3803,9 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
             !isa<BitCastOperator>(Dest)) {
           // Need to make sure Dest not updated after current memcpy.
           // Check Dest only have 1 store now.
-          PointerStatus DestPS(size);
-          PointerStatus::analyzePointer(Dest, DestPS, typeSys, bStructElt);
-          if (DestPS.storedType != PointerStatus::StoredType::Stored) {
+          hlutil::PointerStatus DestPS(Dest, size, /*bLdStOnly*/ false);
+          DestPS.analyze(typeSys, bStructElt);
+          if (DestPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
             ReplaceMemcpy(Dest, V, MC, annotation, typeSys, DL);
             // V still need to be flatten.
             // Lower memcpy come from Dest.
@@ -6401,11 +6206,10 @@ void PatchDebugInfo(DebugInfoFinder &DbgFinder, Function *F, GlobalVariable *GV,
 bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
   DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
   unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
-  PointerStatus PS(size);
+  hlutil::PointerStatus PS(GV, size, /*bLdStOnly*/ false);
   GV->removeDeadConstantUsers();
-  PS.analyzePointer(GV, PS, typeSys, /*bStructElt*/ false);
-  bool NotStored = (PS.storedType == PointerStatus::StoredType::NotStored) ||
-                   (PS.storedType == PointerStatus::StoredType::InitializerStored);
+  PS.analyze(typeSys, /*bStructElt*/ false);
+  bool NotStored = !PS.HasStored();
   // Make sure GV only used in one function.
   // Skip GV which don't have store.
   if (PS.HasMultipleAccessingFunctions || NotStored)

+ 20 - 4
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1578,6 +1578,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       // Construct annoation for this pointer.
       ConstructFieldAttributedAnnotation(paramAnnotation, ThisTy,
                                          bDefaultRowMajor);
+      if (MethodDecl->isConst()) {
+        paramAnnotation.SetParamInputQual(DxilParamInputQual::In);
+      } else {
+        paramAnnotation.SetParamInputQual(DxilParamInputQual::Inout);
+      }
     }
   }
 
@@ -5393,7 +5398,6 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     bool isAggregateType = !isObject &&
       (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
       !hlsl::IsHLSLVecMatType(ParamTy);
-    bool bInOut = Param->isModifierIn() && Param->isModifierOut();
 
     bool EmitRValueAgg = false;
     bool RValOnRef = false;
@@ -5471,9 +5475,21 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
       argLV = CGF.EmitLValue(Arg);
       if (argLV.isSimple())
         argAddr = argLV.getAddress();
-      // Skip copy-in copy-out for local variables.
-      if (bInOut && argAddr &&
-          (isa<AllocaInst>(argAddr) || isa<Argument>(argAddr))) {
+      // When there's argument need to lower like buffer/cbuffer load, need to
+      // copy to let the lower not happen on argument when calle is noinline or
+      // extern functions. Will do it in HLLegalizeParameter after known which
+      // functions are extern but before inline.
+      bool bConstGlobal = false;
+      if (GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(argAddr)) {
+        bConstGlobal = m_ConstVarAnnotationMap.count(GV) | GV->isConstant();
+      }
+      // Skip copy-in copy-out when safe.
+      // The unsafe case will be global variable alias with parameter.
+      // Then global variable is updated in the function, the parameter will
+      // be updated silently. For non global variable or constant global
+      // variable, it should be safe.
+      if (argAddr && (isa<AllocaInst>(argAddr) || isa<Argument>(argAddr) ||
+                      bConstGlobal)) {
         llvm::Type *ToTy = CGF.ConvertType(ParamTy.getNonReferenceType());
         if (argAddr->getType()->getPointerElementType() == ToTy &&
             // Check clang Type for case like int cast to unsigned.

+ 0 - 5
tools/clang/test/HLSLFileCheck/dxil/debug/misc/intrinsic4_dbg.hlsl

@@ -12,13 +12,8 @@
 // CHECK: i1 false
 // CHECK: texture2DMSGetSamplePosition
 // CHECK: getDimensions
-// CHECK: llvm.dbg.value(metadata i32 %
-// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
-// CHECK: llvm.dbg.value(metadata i32 %
-// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
-// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
 
 // Exclude quoted source file (see readme)

+ 62 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/global_param_alias.hlsl

@@ -0,0 +1,62 @@
+// RUN: %dxc -E main -T ps_6_0  %s | FileCheck %s
+
+// Make sure static global a not alias with local t.
+
+// store io before call foo. Value should be 0.
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 0, i32 4, float 0.000000e+00, float undef, float undef, float undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 0, i32 8, i32 0, i32 undef, i32 undef, i32 undef, i8 1)
+
+// store io after ++ in foo and after call foo. Value should be 1.
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 4, float 1.000000e+00, float undef, float undef, float undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 8, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 4, float 1.000000e+00, float undef, float undef, float undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 8, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
+
+// sore a after bar. Value should be -1.
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 -1, i32 0, i32 -1, i32 undef, i32 undef, i32 undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 -1, i32 4, float -1.000000e+00, float undef, float undef, float undef, i8 1)
+// CHECK:  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 -1, i32 8, i32 -1, i32 undef, i32 undef, i32 undef, i8 1)
+
+// Make sure return 3.
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 3.000000e+00)
+
+struct ST {
+   int a;
+   float b;
+   uint c;
+};
+
+static ST a;
+
+
+const static ST b = {0, 0, 0};
+
+RWStructuredBuffer<ST> u;
+
+void foo(inout ST io) {
+  io.a++;
+  io.b++;
+  io.c++;
+  u[io.a] = io;
+}
+
+void bar(inout ST io) {
+  a.a--;
+  a.b--;
+  a.c--;
+  u[io.a] = io;
+  foo(io);
+  u[io.a] = io;
+}
+
+float main() : SV_Target {
+  a = b;
+  ST t = a;
+  bar(t);
+
+  u[a.a] = a;
+  return t.a + t.b + t.c;
+}

+ 23 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/not_copy_cbuffer_argument.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E main -Tps_6_0 -fcgl %s | FileCheck %s
+
+
+// Make sure no memcpy generated.
+// CHECK:@main
+// CHECK-NOT:memcpy
+
+struct Data
+{
+ float4 f[64];
+};
+
+cbuffer A {
+  Data a;
+};
+
+float4 foo(Data d, int i) {
+  return d.f[i];
+}
+
+float4 main(int i:I) :SV_Target {
+  return foo(a, i);
+}

+ 4 - 4
tools/clang/test/HLSLFileCheck/hlsl/resource_binding/res_in_cb1.hlsl

@@ -4,10 +4,10 @@
 //CHECK: tx0.s                             sampler      NA          NA      S0             s0     1
 //CHECK: tx1.s                             sampler      NA          NA      S1             s1     1
 //CHECK: s                                 sampler      NA          NA      S2             s3     1
-//CHECK: tx0.t2                            texture     f32          2d      T0             t1     1
-//CHECK: tx0.t                             texture     f32          2d      T1             t0     1
-//CHECK: tx1.t2                            texture     f32          2d      T2             t6     1
-//CHECK: tx1.t                             texture     f32          2d      T3             t5     1
+//CHECK: tx0.t                             texture     f32          2d      T0             t0     1
+//CHECK: tx0.t2                            texture     f32          2d      T1             t1     1
+//CHECK: tx1.t                             texture     f32          2d      T2             t5     1
+//CHECK: tx1.t2                            texture     f32          2d      T3             t6     1
 //CHECK: x                                 texture     f32          2d      T4             t3     1
 
 struct LegacyTex

+ 1 - 0
utils/hct/hctdb.py

@@ -1965,6 +1965,7 @@ class db_dxil(object):
 
         add_pass('hlsl-hlemit', 'HLEmitMetadata', 'HLSL High-Level Metadata Emit.', [])
         add_pass("hl-expand-store-intrinsics", "HLExpandStoreIntrinsics", "Expand HLSL store intrinsics", [])
+        add_pass("hl-legalize-parameter", "HLLegalizeParameter", "Legalize parameter", [])
         add_pass('scalarrepl-param-hlsl', 'SROA_Parameter_HLSL', 'Scalar Replacement of Aggregates HLSL (parameters)', [])
         add_pass('scalarreplhlsl', 'SROA_DT_HLSL', 'Scalar Replacement of Aggregates HLSL (DT)', [])
         add_pass('scalarreplhlsl-ssa', 'SROA_SSAUp_HLSL', 'Scalar Replacement of Aggregates HLSL (SSAUp)', [])