Procházet zdrojové kódy

Merge remote-tracking branch 'ms/master' into dxc-opts3

# Conflicts:
#	lib/DxilContainer/DxilContainerAssembler.cpp
Tex Riddell před 6 roky
rodič
revize
753d02595c
25 změnil soubory, kde provedl 624 přidání a 36 odebrání
  1. 2 0
      include/dxc/DXIL/DxilModule.h
  2. 5 4
      include/dxc/DxilContainer/DxilContainer.h
  3. 1 1
      include/dxc/Support/HLSLOptions.td
  4. 1 0
      include/llvm/InitializePasses.h
  5. 7 0
      include/llvm/Transforms/Scalar.h
  6. 47 0
      lib/DXIL/DxilModule.cpp
  7. 22 12
      lib/DxilContainer/DxilContainerAssembler.cpp
  8. 4 12
      lib/DxilDia/DxilDiaTableSymbols.cpp
  9. 1 0
      lib/HLSL/DxcOptimizer.cpp
  10. 4 0
      lib/Transforms/IPO/PassManagerBuilder.cpp
  11. 1 0
      lib/Transforms/Scalar/CMakeLists.txt
  12. 180 0
      lib/Transforms/Scalar/DxilFixConstArrayInitializer.cpp
  13. 3 3
      tools/clang/lib/CodeGen/CGExprCXX.cpp
  14. 3 3
      tools/clang/test/CodeGenHLSL/batch/misc/RValSubscript.hlsl
  15. 32 0
      tools/clang/test/CodeGenHLSL/batch/misc/gv_memcpy_before_alloca.hlsl
  16. 26 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer.hlsl
  17. 27 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer_uint.hlsl
  18. 26 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer_vec.hlsl
  19. 20 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store.hlsl
  20. 23 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store2.hlsl
  21. 21 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store3.hlsl
  22. 25 0
      tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_load_store.hlsl
  23. 58 0
      tools/clang/test/CodeGenHLSL/quick-test/strip_reflect.hlsl
  24. 81 0
      tools/clang/test/CodeGenHLSL/quick-test/struct_buf_strip_reflect.hlsl
  25. 4 1
      tools/clang/tools/dxcompiler/dxcompilerobj.cpp

+ 2 - 0
include/dxc/DXIL/DxilModule.h

@@ -176,6 +176,7 @@ public:
   void ReEmitDxilResources();
   /// Deserialize DXIL metadata form into in-memory form.
   void LoadDxilMetadata();
+
   /// Check if a Named meta data node is known by dxil module.
   static bool IsKnownNamedMetaData(llvm::NamedMDNode &Node);
 
@@ -186,6 +187,7 @@ public:
   void ResetOP(hlsl::OP *hlslOP);
   void ResetEntryPropsMap(DxilEntryPropsMap &&PropMap);
 
+  void StripReflection();
   void StripDebugRelatedCode();
   llvm::DebugInfoFinder &GetOrCreateDebugInfoFinder();
 

+ 5 - 4
include/dxc/DxilContainer/DxilContainer.h

@@ -391,10 +391,11 @@ inline bool GetDxilShaderDebugName(const DxilPartHeader *pDebugNamePart,
 }
 
 enum class SerializeDxilFlags : uint32_t {
-  None = 0,                     // No flags defined.
-  IncludeDebugInfoPart = 1,     // Include the debug info part in the container.
-  IncludeDebugNamePart = 2,     // Include the debug name part in the container.
-  DebugNameDependOnSource = 4   // Make the debug name depend on source (and not just final module).
+  None = 0,                         // No flags defined.
+  IncludeDebugInfoPart = 1,         // Include the debug info part in the container.
+  IncludeDebugNamePart = 2,         // Include the debug name part in the container.
+  DebugNameDependOnSource = 4,      // Make the debug name depend on source (and not just final module).
+  StripReflectionFromDxilPart = 8,  // Strip Reflection info from DXIL part.
 };
 inline SerializeDxilFlags& operator |=(SerializeDxilFlags& l, const SerializeDxilFlags& r) {
   l = static_cast<SerializeDxilFlags>(static_cast<int>(l) | static_cast<int>(r));

+ 1 - 1
include/dxc/Support/HLSLOptions.td

@@ -347,7 +347,7 @@ def P : Separate<["-", "/"], "P">, Flags<[DriverOption]>, Group<hlslutil_Group>,
 
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Load a binary file rather than compiling">;
-def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[DriverOption]>, Group<hlslutil_Group>,
+def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[CoreOption]>, Group<hlslutil_Group>,
   HelpText<"Strip reflection data from shader bytecode  (must be used with /Fo <file>)">;
 def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Flags<[CoreOption]>, Group<hlslutil_Group>,
   HelpText<"Strip debug information from 4_0+ shader bytecode  (must be used with /Fo <file>)">;

+ 1 - 0
include/llvm/InitializePasses.h

@@ -260,6 +260,7 @@ void initializeResourceToHandlePass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
 void initializeHoistConstantArrayPass(PassRegistry&);
 void initializeDxilLoopUnrollPass(PassRegistry&);
+void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 // HLSL Change Ends
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
 void initializeScalarEvolutionPass(PassRegistry&);

+ 7 - 0
include/llvm/Transforms/Scalar.h

@@ -124,6 +124,13 @@ void initializeSROA_DT_HLSLPass(PassRegistry&);
 ModulePass *createSROA_Parameter_HLSL();
 void initializeSROA_Parameter_HLSLPass(PassRegistry&);
 
+//===----------------------------------------------------------------------===//
+//
+// Cleans up constant stores that didn't get a chance to be turned into initializers
+//
+Pass *createDxilFixConstArrayInitializerPass();
+void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
+
 Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt);
 void initializeDxilLoopUnrollPass(PassRegistry&);
 //===----------------------------------------------------------------------===//

+ 47 - 0
lib/DXIL/DxilModule.cpp

@@ -1403,6 +1403,53 @@ void DxilModule::ReEmitDxilResources() {
   EmitDxilMetadata();
 }
 
+template <typename TResource>
+static void
+StripResourcesReflection(std::vector<std::unique_ptr<TResource>> &vec) {
+  for (auto &p : vec) {
+    p->SetGlobalName("");
+    // Cannot remove global symbol which used by validation.
+  }
+}
+
+void DxilModule::StripReflection() {
+  // Remove names.
+  for (Function &F : m_pModule->functions()) {
+    for (BasicBlock &BB : F) {
+      if (BB.hasName())
+        BB.setName("");
+      for (Instruction &I : BB) {
+        if (I.hasName())
+          I.setName("");
+      }
+    }
+  }
+  // Remove struct annotation.
+  // FunctionAnnotation is used later, so keep it.
+  m_pTypeSystem->GetStructAnnotationMap().clear();
+
+
+  // Resource
+  StripResourcesReflection(m_CBuffers);
+  StripResourcesReflection(m_UAVs);
+  StripResourcesReflection(m_SRVs);
+  StripResourcesReflection(m_Samplers);
+
+  // Unused global.
+  SmallVector<GlobalVariable *,2> UnusedGlobals;
+  for (GlobalVariable &GV : m_pModule->globals()) {
+    if (GV.use_empty())
+      UnusedGlobals.emplace_back(&GV);
+  }
+
+  for (GlobalVariable *GV : UnusedGlobals) {
+    GV->eraseFromParent();
+  }
+
+  // ReEmit meta.
+  ReEmitDxilResources();
+}
+
 void DxilModule::LoadDxilResources(const llvm::MDOperand &MDO) {
   if (MDO.get() == nullptr)
     return;

+ 22 - 12
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -1529,7 +1529,7 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   pModule->GetDxilVersion(major, minor);
   RootSignatureWriter rootSigWriter(pModule->GetSerializedRootSignature());
 
-  bool bModuleDirty = false;
+  bool bMetadataStripped = false;
   if (pModule->GetShaderModel()->IsLib()) {
     DXASSERT(pModule->GetSerializedRootSignature().empty(),
              "otherwise, library has root signature outside subobject definitions");
@@ -1538,7 +1538,7 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
     writer.AddPart(
         DFCC_RuntimeData, pRDATWriter->size(),
         [&](AbstractMemoryStream *pStream) { pRDATWriter->write(pStream); });
-    bModuleDirty |= pModule->StripSubobjectsFromMetadata();
+    bMetadataStripped |= pModule->StripSubobjectsFromMetadata();
   } else {
     // Write the DxilPipelineStateValidation (PSV0) part.
     pPSVWriter = llvm::make_unique<DxilPSVWriter>(*pModule);
@@ -1550,13 +1550,13 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
       writer.AddPart(
         DFCC_RootSignature, rootSigWriter.size(),
         [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
-      bModuleDirty |= pModule->StripRootSignatureFromMetadata();
+      bMetadataStripped |= pModule->StripRootSignatureFromMetadata();
     }
   }
 
-  // If metadata was stripped, re-serialize the module.
+  // If metadata was stripped, re-serialize the input module.
   CComPtr<AbstractMemoryStream> pInputProgramStream = pModuleBitcode;
-  if (bModuleDirty) {
+  if (bMetadataStripped) {
     pInputProgramStream.Release();
     IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pInputProgramStream));
     raw_stream_ostream outStream(pInputProgramStream.p);
@@ -1565,7 +1565,9 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
 
   // If we have debug information present, serialize it to a debug part, then use the stripped version as the canonical program version.
   CComPtr<AbstractMemoryStream> pProgramStream = pInputProgramStream;
-  if (HasDebugInfo(*pModule->GetModule())) {
+  bool bModuleStripped = false;
+  bool bHasDebugInfo = HasDebugInfo(*pModule->GetModule());
+  if (bHasDebugInfo) {
     uint32_t debugInUInt32, debugPaddingBytes;
     GetPaddedProgramPartSize(pInputProgramStream, debugInUInt32, debugPaddingBytes);
     if (Flags & SerializeDxilFlags::IncludeDebugInfoPart) {
@@ -1574,20 +1576,28 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
       });
     }
 
-    pProgramStream.Release();
-
     llvm::StripDebugInfo(*pModule->GetModule());
     pModule->StripDebugRelatedCode();
-
-    IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pProgramStream));
-    raw_stream_ostream outStream(pProgramStream.p);
-    WriteBitcodeToFile(pModule->GetModule(), outStream, true);
+    bModuleStripped = true;
   } else {
     // If no debug info, clear DebugNameDependOnSource
     // (it's default, and this scenario can happen)
     Flags &= ~SerializeDxilFlags::DebugNameDependOnSource;
   }
 
+  if (Flags & SerializeDxilFlags::StripReflectionFromDxilPart) {
+    pModule->StripReflection();
+    bModuleStripped = true;
+  }
+
+  // If debug info or reflection was stripped, re-serialize the module.
+  if (bModuleStripped) {
+    pProgramStream.Release();
+    IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pProgramStream));
+    raw_stream_ostream outStream(pProgramStream.p);
+    WriteBitcodeToFile(pModule->GetModule(), outStream, true);
+  }
+
   // Serialize debug name if requested.
   CComPtr<AbstractMemoryStream> pHashStream;
   std::string DebugNameStr; // Used if constructing name based on hash

+ 4 - 12
lib/DxilDia/DxilDiaTableSymbols.cpp

@@ -280,24 +280,16 @@ STDMETHODIMP dxil_dia::Symbol::findChildrenEx(
     std::swap(tmp, children);
   }
 
-  CComBSTR n(name);
-  if (compareFlags == nsfCaseInsensitive) {
-      IFR(n.ToLower());
-  }
   if (name != nullptr && compareFlags != nsNone) {
     std::vector<CComPtr<Symbol>> tmp;
     tmp.reserve(children.size());
     for (const auto & c : children) {
       CComBSTR cName;
       IFR(c->get_name(&cName));
-      switch (compareFlags) {
-      case nsfCaseInsensitive:
-          IFR(cName.ToLower());
-          // fallthrough
-      case nsfCaseSensitive:
-          if (cName != n) {
-              continue;
-          }
+      // Careful with the string comparison function we use as it can make us pull in new dependencies
+      // CompareStringOrdinal lives in kernel32.dll
+      if (CompareStringOrdinal(cName, cName.Length(), name, -1, (BOOL)(compareFlags == nsfCaseInsensitive)) != CSTR_EQUAL) {
+        continue;
       }
 
       if (c->m_symTag == symtag) {

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -100,6 +100,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
     initializeDxilLoadMetadataPass(Registry);
     initializeDxilLoopUnrollPass(Registry);
+    initializeDxilFixConstArrayInitializerPass(Registry);
     initializeDxilLowerCreateHandleForLibPass(Registry);
     initializeDxilPrecisePropagatePassPass(Registry);
     initializeDxilPreserveAllOutputsPass(Registry);

+ 4 - 0
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -294,6 +294,10 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   MPM.add(createCFGSimplificationPass());
 
   MPM.add(createDeadCodeEliminationPass());
+
+  if (OptLevel > 0) {
+    MPM.add(createDxilFixConstArrayInitializerPass());
+  }
 }
 // HLSL Change Ends
 

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -45,6 +45,7 @@ add_llvm_library(LLVMScalarOpts
   ScalarReplAggregates.cpp
   ScalarReplAggregatesHLSL.cpp  # HLSL Change
   DxilLoopUnroll.cpp # HLSL Change
+  DxilFixConstArrayInitializer.cpp # HLSL Change
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
   SimplifyCFGPass.cpp

+ 180 - 0
lib/Transforms/Scalar/DxilFixConstArrayInitializer.cpp

@@ -0,0 +1,180 @@
+//===- DxilFixConstArrayInitializer.cpp - Special Construct Initializer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Transforms/Scalar.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/HLSL/HLModule.h"
+
+#include <unordered_map>
+#include <limits>
+
+using namespace llvm;
+
+namespace {
+
+class DxilFixConstArrayInitializer : public ModulePass {
+public:
+  static char ID;
+  DxilFixConstArrayInitializer() : ModulePass(ID) {
+    initializeDxilFixConstArrayInitializerPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "Dxil Fix Const Array Initializer"; }
+};
+
+char DxilFixConstArrayInitializer::ID;
+}
+
+static bool TryFixGlobalVariable(GlobalVariable &GV, BasicBlock *EntryBlock, const std::unordered_map<Instruction *, unsigned> &InstOrder) {
+  // Only proceed if the variable has an undef initializer
+  if (!GV.hasInitializer() || !isa<UndefValue>(GV.getInitializer()))
+    return false;
+
+  // Only handle cases when it's an array of scalars.
+  Type *Ty = GV.getType()->getPointerElementType();
+  if (!Ty->isArrayTy())
+    return false;
+
+  // Don't handle arrays that are too big
+  if (Ty->getArrayNumElements() > 1024)
+    return false;
+
+  Type *ElementTy = Ty->getArrayElementType();
+
+  // Only handle arrays of scalar types
+  if (ElementTy->isAggregateType() || ElementTy->isVectorTy())
+    return false;
+
+  // The instruction index at which point we no longer consider it
+  // safe to fold Stores. It's the earliest store with non-constant index,
+  // earliest store with non-constant value, or a load
+  unsigned FirstUnsafeIndex = std::numeric_limits<unsigned>::max();
+
+  SmallVector<StoreInst *, 8> PossibleFoldableStores;
+
+  // First do a pass to find the boundary for where we could fold stores. Get a
+  // list of stores that may be folded.
+  for (User *U : GV.users()) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+      bool AllConstIndices = GEP->hasAllConstantIndices();
+      unsigned NumIndices = GEP->getNumIndices();
+
+      if (NumIndices != 2)
+        return false;
+
+      for (User *GEPUser : GEP->users()) {
+        if (StoreInst *Store = dyn_cast<StoreInst>(GEPUser)) {
+          if (Store->getParent() != EntryBlock)
+            continue;
+          unsigned StoreIndex = InstOrder.at(Store);
+          if (!AllConstIndices || !isa<Constant>(Store->getValueOperand())) {
+            FirstUnsafeIndex = std::min(StoreIndex, FirstUnsafeIndex);
+            continue;
+          }
+          PossibleFoldableStores.push_back(Store);
+        }
+        else if (LoadInst *Load = dyn_cast<LoadInst>(GEPUser)) {
+          if (Load->getParent() != EntryBlock)
+            continue;
+          FirstUnsafeIndex = std::min(FirstUnsafeIndex, InstOrder.at(Load));
+        }
+        // If we have something weird like chained GEPS, or bitcasts, give up.
+        else {
+          return false;
+        }
+      }
+    }
+  }
+  
+  SmallVector<Constant *, 16> InitValue;
+  SmallVector<unsigned, 16>   LatestStores;
+  SmallVector<StoreInst *, 8> StoresToRemove;
+
+  InitValue.resize(Ty->getArrayNumElements());
+  LatestStores.resize(Ty->getArrayNumElements());
+
+  for (StoreInst *Store : PossibleFoldableStores) {
+    unsigned StoreIndex = InstOrder.at(Store);
+    // Skip stores that are out of bounds
+    if (StoreIndex >= FirstUnsafeIndex)
+      continue;
+
+    GEPOperator *GEP = cast<GEPOperator>(Store->getPointerOperand());
+    uint64_t Index = cast<ConstantInt>(GEP->getOperand(2))->getLimitedValue();
+
+    if (LatestStores[Index] <= StoreIndex) {
+      InitValue[Index] = cast<Constant>(Store->getValueOperand());
+      LatestStores[Index] = StoreIndex;
+    }
+    StoresToRemove.push_back(Store);
+  }
+
+  // Give up if we have missing indices
+  for (Constant *C : InitValue)
+    if (!C)
+      return false;
+
+  GV.setInitializer(ConstantArray::get(cast<ArrayType>(Ty), InitValue));
+
+  for (StoreInst *Store : StoresToRemove)
+    Store->eraseFromParent();
+
+  return true;
+}
+
+bool DxilFixConstArrayInitializer::runOnModule(Module &M) {
+  BasicBlock *EntryBlock = nullptr;
+
+  if (M.HasDxilModule()) {
+    hlsl::DxilModule &DM = M.GetDxilModule();
+    if (DM.GetEntryFunction()) {
+      EntryBlock = &DM.GetEntryFunction()->getEntryBlock();
+    }
+  }
+  else if (M.HasHLModule()) {
+    hlsl::HLModule &HM = M.GetHLModule();
+    if (HM.GetEntryFunction())
+      EntryBlock = &HM.GetEntryFunction()->getEntryBlock();
+  }
+
+  if (!EntryBlock)
+    return false;
+
+  // If some block might branch to the entry for some reason (like if it's a loop header),
+  // give up now. Have to make sure this block is not preceeded by anything.
+  if (pred_begin(EntryBlock) != pred_end(EntryBlock))
+    return false;
+
+  // Find the instruction order for everything in the entry block.
+  std::unordered_map<Instruction *, unsigned> InstOrder;
+  for (Instruction &I : *EntryBlock) {
+    InstOrder[&I] = InstOrder.size();
+  }
+
+  bool Changed = false;
+  for (GlobalVariable &GV : M.globals()) {
+    Changed = TryFixGlobalVariable(GV, EntryBlock, InstOrder);
+  }
+
+  return Changed;
+}
+
+
+Pass *llvm::createDxilFixConstArrayInitializerPass() {
+  return new DxilFixConstArrayInitializer();
+}
+
+INITIALIZE_PASS(DxilFixConstArrayInitializer, "dxil-fix-array-init", "Dxil Fix Array Initializer", false, false)

+ 3 - 3
tools/clang/lib/CodeGen/CGExprCXX.cpp

@@ -198,7 +198,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
         This = EmitLValue(Base).getAddress();
       } else {
         llvm::Value *Val = EmitScalarExpr(Base);
-        This = Builder.CreateAlloca(Val->getType());
+        This = CreateTempAlloca(Val->getType());
         CGM.getHLSLRuntime().EmitHLSLMatrixStore(*this, Val, This, Base->getType());
       }
 
@@ -222,7 +222,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
           This = LV.getAddress();
           if (isa<ExtMatrixElementExpr>(Base)) {
             llvm::Value *Val = Builder.CreateLoad(This);
-            This = Builder.CreateAlloca(Val->getType());
+            This = CreateTempAlloca(Val->getType());
             Builder.CreateStore(Val, This);
           }
         } else {
@@ -245,7 +245,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
         }
       } else {
         llvm::Value *Val = EmitScalarExpr(Base);
-        This = Builder.CreateAlloca(Val->getType());
+        This = CreateTempAlloca(Val->getType());
         Builder.CreateStore(Val, This);
       }
       bool isBool = false;

+ 3 - 3
tools/clang/test/CodeGenHLSL/batch/misc/RValSubscript.hlsl

@@ -1,6 +1,7 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: alloca [16 x i32]
+// CHECK-DAG: alloca [16 x i32]
+// CHECK-DAG: alloca [4 x i1]
 
 // For b4[2]
 // CHECK: cbufferLoadLegacy
@@ -30,7 +31,6 @@
 // CHECK: fcmp fast olt
 // CHECK: fcmp fast olt
 // CHECK: fcmp fast olt
-// CHECK: alloca [4 x i1]
 
 // For (xt == 0)[i][i]
 // CHECK: fcmp fast oeq
@@ -63,4 +63,4 @@ float4 main(uint4 a : A) : SV_TARGET
   x += (x4 < i)[i];
   x += (xt == 6)[i][i];
   return x;
-}
+}

+ 32 - 0
tools/clang/test/CodeGenHLSL/batch/misc/gv_memcpy_before_alloca.hlsl

@@ -0,0 +1,32 @@
+// RUN: %dxc /Od /T vs_6_0 /E main %s | FileCheck %s
+
+// Regression check for a case with /Od where there is a static variable struct
+// write before an alloca. As part of the algorithm to replace all uses of the
+// GV before initialization with 0's, the basic block is split where the first
+// memcpy occurs. This may cause alloca's to get stuck in a non-emtry block and
+// be missed by lowering transformations, such as in this example, an alloca of
+// <16 x float> sticks around.
+
+// CHECK: void @main
+
+float4x4 make(float4 a, float4 b, float4 c, float4 d)
+{
+ float4x4 mat;
+ mat._11_21_31_41 = a;
+ mat._12_22_32_42 = b;
+ mat._13_23_33_43 = c;
+ mat._14_24_34_44 = d;
+ return mat;
+}
+struct A {
+ float2 foo;
+};
+
+static A glob_a;
+
+void main()
+{
+ A a;
+ glob_a = a;
+ float3 b = float3(make(0, 1, 2, float4(0,0,0,1))[3].xyz);
+}

+ 26 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: = internal unnamed_addr constant [10 x float] [
+// CHECK-NOT: store float
+
+float f(float a) {
+  return a * 2;
+}
+
+static float GLOB[10] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+  f(4),
+  f(5),
+  f(6),
+  f(7),
+  f(8),
+  f(9),
+};
+
+[RootSignature("")]
+float main(float a : A) : SV_Target {
+  return GLOB[a];
+}

+ 27 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer_uint.hlsl

@@ -0,0 +1,27 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: = internal unnamed_addr constant [10 x i32] [
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[10] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+  f(4),
+  f(5),
+  f(6),
+  f(7),
+  f(8),
+  f(9),
+};
+
+[RootSignature("")]
+float main(uint a : A) : SV_Target {
+  return GLOB[a];
+}
+

+ 26 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/function_call_initializer_vec.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr constant [10 x float] [
+// CHECK: = internal unnamed_addr constant [10 x float] [
+// CHECK-NOT: store float
+
+float2 f(float a) {
+  return float2(a, a * 2);
+}
+
+static float2 GLOB[10] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+  f(4),
+  f(5),
+  f(6),
+  f(7),
+  f(8),
+  f(9),
+};
+
+[RootSignature("")]
+float main(float a : A) : SV_Target {
+  return GLOB[a].x + GLOB[a].y;
+}

+ 20 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr constant [4 x i32] [i32 0, i32 2, i32 4, i32 20]
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A) : SV_Target {
+  GLOB[3] = f(10);
+  return GLOB[a];
+}

+ 23 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store2.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr global [4 x i32] [i32 0, i32 2, i32 4, i32 6]
+// CHECK: store i32
+// CHECK: store i32
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A, uint b : B) : SV_Target {
+  GLOB[b] = 20;
+  GLOB[3] = 10;
+  return GLOB[a];
+}

+ 21 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_duplicate_store3.hlsl

@@ -0,0 +1,21 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr constant [4 x i32] [i32 0, i32 2, i32 4, i32 20]
+
+// CHECK-NOT: store i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A, uint b : B) : SV_Target {
+  GLOB[3] = 20;
+  return GLOB[a];
+}

+ 25 - 0
tools/clang/test/CodeGenHLSL/batch/passes/dxil_array_initializer/initializer_load_store.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E -O3 -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: = internal unnamed_addr global [4 x i32] [i32 0, i32 2, i32 4, i32 6]
+// CHECK: load i32
+// CHECK: store i32
+// CHECK: load i32
+// CHECK-NOT: store i32
+// CHECK-NOT: load i32
+
+uint f(uint a) {
+  return a * 2;
+}
+
+static uint GLOB[4] = {
+  f(0),
+  f(1),
+  f(2),
+  f(3),
+};
+
+[RootSignature("")]
+float main(uint a : A) : SV_Target {
+  uint result = GLOB[a];
+  GLOB[3] = f(10);
+  return result + GLOB[a];
+}

+ 58 - 0
tools/clang/test/CodeGenHLSL/quick-test/strip_reflect.hlsl

@@ -0,0 +1,58 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Qstrip_reflect | FileCheck %s
+
+// Make sure only function annotation, no struct annotation.
+// CHECK:!dx.typeAnnotations = !{[[FuncAnnot:[^,]+]]}
+// CHECK:[[FuncAnnot]] = !{i32 1, void ()* @main,
+
+//--------------------------------------------------------------------------------------
+// File: BasicHLSL11_PS.hlsl
+//
+// The pixel shader file for the BasicHLSL11 sample.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//--------------------------------------------------------------------------------------
+
+//--------------------------------------------------------------------------------------
+// Globals
+//--------------------------------------------------------------------------------------
+cbuffer cbPerObject : register( b0 )
+{
+    float4    g_vObjectColor    : packoffset( c0 );
+};
+
+cbuffer cbPerFrame : register( b1 )
+{
+    float3    g_vLightDir    : packoffset( c0 );
+    float    g_fAmbient    : packoffset( c0.w );
+};
+
+//--------------------------------------------------------------------------------------
+// Textures and Samplers
+//--------------------------------------------------------------------------------------
+Texture2D    g_txDiffuse : register( t0 );
+SamplerState    g_samLinear : register( s0 );
+
+//--------------------------------------------------------------------------------------
+// Input / Output structures
+//--------------------------------------------------------------------------------------
+struct PS_INPUT
+{
+  sample          float3 vNormal    : NORMAL;
+  noperspective   float2 vTexcoord  : TEXCOORD0;
+};
+
+//--------------------------------------------------------------------------------------
+// Pixel Shader
+//--------------------------------------------------------------------------------------
+float4 main( PS_INPUT Input) : SV_TARGET
+{
+    float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
+    if (g_vObjectColor.x > 0.3)
+      return vDiffuse;
+
+    float fLighting = saturate( dot( g_vLightDir, Input.vNormal ) );
+    fLighting = max( fLighting, g_fAmbient );
+    
+    return vDiffuse * fLighting;
+}
+

+ 81 - 0
tools/clang/test/CodeGenHLSL/quick-test/struct_buf_strip_reflect.hlsl

@@ -0,0 +1,81 @@
+// RUN: %dxc -E main -T ps_6_2 -enable-16bit-types -HV 2018 -Qstrip_reflect %s  | FileCheck %s
+
+
+struct MyStruct1
+{
+    half3   m_1;
+    int4    m_2;
+    half3   m_3;
+    half4   m_4;
+    double  m_5;
+    half    m_6;
+    half    m_7;
+    half    m_8;
+    int     m_9;
+    int16_t m_10;
+    uint16_t4 m_11;
+};
+
+struct MyStruct2
+{
+    double    m_1;
+    half3     m_2;
+    int       m_3;
+    int16_t   m_4;
+    float     m_5;
+    uint16_t3 m_6;
+    double    m_7;
+};
+
+RWStructuredBuffer<MyStruct1> g_sb1: register(u0);
+RWStructuredBuffer<MyStruct2> g_sb2: register(u1);
+
+float4 main() : SV_Target {
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle:[^,]+]], i32 0, i32 0, half 0xH3C00, half 0xH3C00, half 0xH3C00, half undef, i8 7, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 8, i32 2, i32 2, i32 2, i32 2, i8 15, i32 4)
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 24, half 0xH4200, half 0xH4200, half 0xH4200, half undef, i8 7, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 30, half 0xH4400, half 0xH4400, half 0xH4400, half 0xH4400, i8 15, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 40, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 8)
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 48, half 0xH4600, half undef, half undef, half undef, i8 1, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 50, half 0xH4700, half undef, half undef, half undef, i8 1, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 52, half 0xH4800, half undef, half undef, half undef, i8 1, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 56, i32 9, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+    // CHECK: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 60, i16 10, i16 undef, i16 undef, i16 undef, i8 1, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %[[Handle]], i32 0, i32 62, i16 11, i16 11, i16 11, i16 11, i8 15, i32 2)
+    MyStruct1 myStruct;
+    myStruct.m_1 = 1;
+    myStruct.m_2 = 2;
+    myStruct.m_3 = 3;
+    myStruct.m_4 = 4;
+    myStruct.m_5 = 5;
+    myStruct.m_6 = 6;
+    myStruct.m_7 = 7;
+    myStruct.m_8 = 8;
+    myStruct.m_9 = 9;
+    myStruct.m_10 = 10;
+    myStruct.m_11 = 11;
+    g_sb1[0] = myStruct;
+
+    // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %[[Handle2:[^,]+]], i32 0, i32 0, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 8)
+    // CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %[[Handle2]], i32 0, i32 8, half 0xH4000, half 0xH4000, half 0xH4000, half undef, i8 7, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %[[Handle2]], i32 0, i32 16, i32 3, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+    // CHECK: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %[[Handle2]], i32 0, i32 20, i16 4, i16 undef, i16 undef, i16 undef, i8 1, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %[[Handle2]], i32 0, i32 24, float 5.000000e+00, float undef, float undef, float undef, i8 1, i32 4)
+    // CHECK: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %[[Handle2]], i32 0, i32 28, i16 6, i16 6, i16 6, i16 undef, i8 7, i32 2)
+    // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %[[Handle2]], i32 0, i32 40, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 8)
+    MyStruct2 myStruct2;
+    myStruct2.m_1 = 1;
+    myStruct2.m_2 = 2;
+    myStruct2.m_3 = 3;
+    myStruct2.m_4 = 4;
+    myStruct2.m_5 = 5;
+    myStruct2.m_6 = 6;
+    myStruct2.m_7 = 7;
+    g_sb2[0] = myStruct2;
+
+    return 1;
+}
+
+// Make sure only function annotation, no struct annotation.
+// CHECK:!dx.typeAnnotations = !{[[FuncAnnot:[^,]+]]}
+// CHECK:[[FuncAnnot]] = !{i32 1, void ()* @main,

+ 4 - 1
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -342,7 +342,7 @@ public:
     HRESULT hr = S_OK;
     CComPtr<IDxcBlobEncoding> utf8Source;
     CComPtr<AbstractMemoryStream> pOutputStream;
-    CHeapPtr<wchar_t> DebugBlobName;
+    CComHeapPtr<wchar_t> DebugBlobName;
 
     DxcEtw_DXCompilerCompile_Start();
     pSourceName = (pSourceName && *pSourceName) ? pSourceName : L"hlsl.hlsl"; // declared optional, so pick a default
@@ -588,6 +588,9 @@ public:
         if (opts.DebugNameForSource) {
           SerializeFlags |= SerializeDxilFlags::DebugNameDependOnSource;
         }
+        if (opts.StripReflection) {
+          SerializeFlags |= SerializeDxilFlags::StripReflectionFromDxilPart;
+        }
 
         // Don't do work to put in a container if an error has occurred
         // Do not create a container when there is only a a high-level representation in the module.