Browse Source

Integrate dxcapi v2 and other changes from internal (#2575)

* Integrate changes from internal.

- dxcapi v2
- new dxc options
- DxilValueCache
- PDB and NoOpt improvements
- noop / llvm::donothing() support

* Update dxrfallbacklayer for dxcapi internal changes

* Reorder diag block based on whether pDiag is set first.

* llvm::donothing() requires dxil 1.6 / SM 6.6 for now, lib as well.

* Fixes for spir-v, non-VC compiler and non-Windows builds

- DEFINE_CROSS_PLATFORM_UUIDOF for new interfaces
- add SAL annotations
- turn output argument validation for -P into warning
- handle warnings without concatenating them to main output
- update spirv preprocessing and compilation paths
- return E_NOTIMPL from IDxcUtils::CreateReflection
- cleanup: DxcContainerBuilder back to uft8, DxcTestUtils: remove comment

* Fix some warnings from clang/gcc.

* Fix unicode conversion problems on linux, where sizeof(wchar_t) == 4

Note this is an intermediate fix.
On linux, what we are calling utf16 is actually a wide string
that's probably utf32.  This change fixes issues introduced by
the new interface changes so things are consistent and pass tests.

A future fix should correct the encodings so they are correctly labeled
on platforms where wchar_t doesn't mean UTF16.

* Return false for IsBufferNullTerminated when CP_ACP.

One test for Disassembler was crashing because it created a pinned blob
with a size of 1 << 31 + 1 without actual memory backing this.  The
IsBufferNullTerminated would attempt to see if this was null terminated,
causing AV.

This change also removes CP_UTF8 from this test when it was creating
binary blobs, not UTF8 text blobs.
Tex Riddell 5 years ago
parent
commit
f4965b71dd
100 changed files with 4545 additions and 775 deletions
  1. 16 0
      include/dxc/DXIL/DxilMetadataHelper.h
  2. 1 0
      include/dxc/DXIL/DxilPDB.h
  3. 2 1
      include/dxc/DxilContainer/DxilContainer.h
  4. 2 1
      include/dxc/DxilContainer/DxilContainerAssembler.h
  5. 2 2
      include/dxc/DxilContainer/DxilPipelineStateValidation.h
  6. 2 0
      include/dxc/DxilContainer/DxilRuntimeReflection.h
  7. 2 2
      include/dxc/DxilRootSignature/DxilRootSignature.h
  8. 1 1
      include/dxc/HLSL/DxilConvergentName.h
  9. 77 0
      include/dxc/HLSL/DxilValueCache.h
  10. 3 3
      include/dxc/Support/DxcLangExtensionsHelper.h
  11. 25 8
      include/dxc/Support/FileIOHelper.h
  12. 5 1
      include/dxc/Support/HLSLOptions.h
  13. 29 8
      include/dxc/Support/HLSLOptions.td
  14. 7 0
      include/dxc/Support/Unicode.h
  15. 2 0
      include/dxc/Support/WinAdapter.h
  16. 463 50
      include/dxc/Support/dxcapi.impl.h
  17. 2 2
      include/dxc/Support/dxcapi.use.h
  18. 1 1
      include/dxc/Support/dxcfilesystem.h
  19. 321 71
      include/dxc/dxcapi.h
  20. 9 1
      include/dxc/dxcdxrfallbackcompiler.h
  21. 5 0
      include/llvm/Analysis/InstructionSimplify.h
  22. 51 0
      include/llvm/IR/IRBuilder.h
  23. 3 0
      include/llvm/InitializePasses.h
  24. 10 0
      include/llvm/Transforms/Scalar.h
  25. 335 6
      lib/Analysis/InstructionSimplify.cpp
  26. 36 0
      lib/DXIL/DxilMetadataHelper.cpp
  27. 6 2
      lib/DXIL/DxilModule.cpp
  28. 2 1
      lib/DXIL/DxilOperations.cpp
  29. 58 33
      lib/DXIL/DxilPDB.cpp
  30. 1 1
      lib/DXIL/DxilShaderFlags.cpp
  31. 5 3
      lib/DXIL/DxilShaderModel.cpp
  32. 5 1
      lib/DXIL/DxilUtil.cpp
  33. 523 271
      lib/DxcSupport/FileIOHelper.cpp
  34. 65 28
      lib/DxcSupport/HLSLOptions.cpp
  35. 26 10
      lib/DxcSupport/Unicode.cpp
  36. 102 20
      lib/DxcSupport/dxcapi.use.cpp
  37. 5 4
      lib/DxcSupport/dxcmem.cpp
  38. 17 4
      lib/DxilContainer/DxilContainerAssembler.cpp
  39. 5 0
      lib/DxilDia/DxilDiaSymbolManager.cpp
  40. 1 1
      lib/DxilDia/DxilDiaSymbolManager.h
  41. 1 1
      lib/DxrFallback/StateFunctionTransform.cpp
  42. 2 0
      lib/HLSL/CMakeLists.txt
  43. 1 1
      lib/HLSL/ComputeViewIdStateBuilder.cpp
  44. 5 0
      lib/HLSL/DxcOptimizer.cpp
  45. 174 4
      lib/HLSL/DxilCondenseResources.cpp
  46. 27 5
      lib/HLSL/DxilContainerReflection.cpp
  47. 164 0
      lib/HLSL/DxilNoops.cpp
  48. 7 14
      lib/HLSL/DxilPatchShaderRecordBindings.cpp
  49. 25 5
      lib/HLSL/DxilPreparePasses.cpp
  50. 451 0
      lib/HLSL/DxilValueCache.cpp
  51. 1 1
      lib/HLSL/HLExpandStoreIntrinsics.cpp
  52. 4 3
      lib/HLSL/HLModule.cpp
  53. 1 21
      lib/HLSL/HLOperationLower.cpp
  54. 2 2
      lib/HLSL/HLSignatureLower.cpp
  55. 28 17
      lib/Transforms/IPO/PassManagerBuilder.cpp
  56. 1 0
      lib/Transforms/Scalar/CMakeLists.txt
  57. 239 0
      lib/Transforms/Scalar/DxilEliminateVector.cpp
  58. 3 3
      lib/Transforms/Scalar/DxilEraseDeadRegion.cpp
  59. 4 23
      lib/Transforms/Scalar/DxilLoopUnroll.cpp
  60. 2 9
      lib/Transforms/Scalar/LowerTypePasses.cpp
  61. 35 0
      lib/Transforms/Scalar/SROA.cpp
  62. 194 50
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  63. 45 3
      lib/Transforms/Scalar/Scalarizer.cpp
  64. 1 1
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  65. 1 0
      tools/clang/lib/Frontend/CMakeLists.txt
  66. 39 0
      tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_nested_noopt.hlsl
  67. 28 0
      tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_noopt.hlsl
  68. 41 0
      tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_opt.hlsl
  69. 2 1
      tools/clang/test/CodeGenSPIRV/spirv.debug.cl-option.hlsl
  70. 18 0
      tools/clang/test/HLSLFileCheck/dxil/debug/dyn_vec.hlsl
  71. 11 0
      tools/clang/test/HLSLFileCheck/dxil/debug/fcgl.hlsl
  72. 23 0
      tools/clang/test/HLSLFileCheck/dxil/debug/global_dyn_vec.hlsl
  73. 22 0
      tools/clang/test/HLSLFileCheck/dxil/debug/global_vec.hlsl
  74. 26 0
      tools/clang/test/HLSLFileCheck/dxil/debug/gv_od.hlsl
  75. 9 4
      tools/clang/test/HLSLFileCheck/dxil/debug/locals/matrix_no_opt.hlsl
  76. 2 2
      tools/clang/test/HLSLFileCheck/dxil/debug/locals/temporary_dbg_declare.hlsl
  77. 28 0
      tools/clang/test/HLSLFileCheck/dxil/debug/mat3x2_dbg.hlsl
  78. 25 0
      tools/clang/test/HLSLFileCheck/dxil/debug/mat_dbg.hlsl
  79. 5 8
      tools/clang/test/HLSLFileCheck/dxil/debug/misc/intrinsic4_dbg.hlsl
  80. 1 1
      tools/clang/test/HLSLFileCheck/dxil/debug/misc/share_mem_dbg.hlsl
  81. 33 0
      tools/clang/test/HLSLFileCheck/dxil/debug/no_fold.hlsl
  82. 38 0
      tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec.hlsl
  83. 34 0
      tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec_array.hlsl
  84. 149 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_call.hlsl
  85. 38 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold.hlsl
  86. 43 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold_vec.hlsl
  87. 44 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/calculations.hlsl
  88. 32 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg.hlsl
  89. 40 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg2.hlsl
  90. 34 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/lexicalBlock.hlsl
  91. 22 0
      tools/clang/test/HLSLFileCheck/dxil/debug/vec_dbg.hlsl
  92. 1 1
      tools/clang/test/HLSLFileCheck/hlsl/control_flow/if_else/if2.hlsl
  93. 6 4
      tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/sample_kwd.hlsl
  94. 3 3
      tools/clang/test/HLSLFileCheck/hlsl/types/boolean/bool_scalar_swizzle.hlsl
  95. 5 18
      tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store.hlsl
  96. 27 0
      tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store_scalar.hlsl
  97. 2 2
      tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/center/center_kwd.hlsl
  98. 3 2
      tools/clang/test/HLSLFileCheck/samples/d3d11/SubD11_SubDToBezierHS.hlsl
  99. 2 2
      tools/clang/tools/dxa/dxa.cpp
  100. 58 27
      tools/clang/tools/dxclib/dxc.cpp

+ 16 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -21,6 +21,7 @@ class LLVMContext;
 class Module;
 class Module;
 class Function;
 class Function;
 class Instruction;
 class Instruction;
+class DbgDeclareInst;
 class Value;
 class Value;
 class MDOperand;
 class MDOperand;
 class Metadata;
 class Metadata;
@@ -54,6 +55,14 @@ struct DxilFunctionProps;
 class DxilSubobjects;
 class DxilSubobjects;
 class DxilSubobject;
 class DxilSubobject;
 
 
+// Additional debug information for SROA'ed array variables,
+// where adjacent elements in DXIL might not have been adjacent
+// in the original user variable.
+struct DxilDIArrayDim {
+  unsigned StrideInBits;
+  unsigned NumElements;
+};
+
 /// Use this class to manipulate DXIL-spcific metadata.
 /// Use this class to manipulate DXIL-spcific metadata.
 // In our code, only DxilModule and HLModule should use this class.
 // In our code, only DxilModule and HLModule should use this class.
 class DxilMDHelper {
 class DxilMDHelper {
@@ -217,6 +226,9 @@ public:
   // NonUniform attribute.
   // NonUniform attribute.
   static const char kDxilNonUniformAttributeMDName[];
   static const char kDxilNonUniformAttributeMDName[];
 
 
+  // Variable debug layout metadata.
+  static const char kDxilVariableDebugLayoutMDName[];
+
   // Validator version.
   // Validator version.
   static const char kDxilValidatorVersionMDName[];
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*
   // Validator version uses the same constants for fields as kDxilVersion*
@@ -484,6 +496,10 @@ public:
   static void MarkPrecise(llvm::Instruction *inst);
   static void MarkPrecise(llvm::Instruction *inst);
   static bool IsMarkedNonUniform(const llvm::Instruction *inst);
   static bool IsMarkedNonUniform(const llvm::Instruction *inst);
   static void MarkNonUniform(llvm::Instruction *inst);
   static void MarkNonUniform(llvm::Instruction *inst);
+  static bool GetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned &StartOffsetInBits, std::vector<DxilDIArrayDim> &ArrayDims);
+  static void SetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned StartOffsetInBits, const std::vector<DxilDIArrayDim> &ArrayDims);
 
 
 private:
 private:
   llvm::LLVMContext &m_Ctx;
   llvm::LLVMContext &m_Ctx;

+ 1 - 0
include/dxc/DXIL/DxilPDB.h

@@ -19,6 +19,7 @@ struct IMalloc;
 namespace hlsl {
 namespace hlsl {
 namespace pdb {
 namespace pdb {
 
 
+  HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppHash, IDxcBlob **ppContainer);
   HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **pOutContainer);
   HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **pOutContainer);
   HRESULT WriteDxilPDB(IMalloc *pMalloc, IDxcBlob *pContainer, llvm::ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob);
   HRESULT WriteDxilPDB(IMalloc *pMalloc, IDxcBlob *pContainer, llvm::ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob);
 }
 }

+ 2 - 1
include/dxc/DxilContainer/DxilContainer.h

@@ -49,7 +49,7 @@ enum class DxilShaderHashFlags : uint32_t {
 typedef struct DxilShaderHash {
 typedef struct DxilShaderHash {
   uint32_t Flags; // DxilShaderHashFlags
   uint32_t Flags; // DxilShaderHashFlags
   uint8_t Digest[DxilContainerHashSize];
   uint8_t Digest[DxilContainerHashSize];
-} DxcShaderHash;
+} DxilShaderHash;
 
 
 struct DxilContainerVersion {
 struct DxilContainerVersion {
   uint16_t Major;
   uint16_t Major;
@@ -410,6 +410,7 @@ enum class SerializeDxilFlags : uint32_t {
   DebugNameDependOnSource     = 1 << 2, // Make the debug name depend on source (and not just final module).
   DebugNameDependOnSource     = 1 << 2, // Make the debug name depend on source (and not just final module).
   StripReflectionFromDxilPart = 1 << 3, // Strip Reflection info from DXIL part.
   StripReflectionFromDxilPart = 1 << 3, // Strip Reflection info from DXIL part.
   IncludeReflectionPart       = 1 << 4, // Include reflection in STAT part.
   IncludeReflectionPart       = 1 << 4, // Include reflection in STAT part.
+  StripRootSignature          = 1 << 5, // Strip Root Signature from main shader container.
 };
 };
 inline SerializeDxilFlags& operator |=(SerializeDxilFlags& l, const SerializeDxilFlags& r) {
 inline SerializeDxilFlags& operator |=(SerializeDxilFlags& l, const SerializeDxilFlags& r) {
   l = static_cast<SerializeDxilFlags>(static_cast<int>(l) | static_cast<int>(r));
   l = static_cast<SerializeDxilFlags>(static_cast<int>(l) | static_cast<int>(r));

+ 2 - 1
include/dxc/DxilContainer/DxilContainerAssembler.h

@@ -52,7 +52,8 @@ void SerializeDxilContainerForModule(hlsl::DxilModule *pModule,
                                      llvm::StringRef DebugName,
                                      llvm::StringRef DebugName,
                                      SerializeDxilFlags Flags,
                                      SerializeDxilFlags Flags,
                                      DxilShaderHash *pShaderHashOut = nullptr,
                                      DxilShaderHash *pShaderHashOut = nullptr,
-                                     AbstractMemoryStream *pReflectionStreamOut = nullptr);
+                                     AbstractMemoryStream *pReflectionStreamOut = nullptr,
+                                     AbstractMemoryStream *pRootSigStreamOut = nullptr);
 void SerializeDxilContainerForRootSignature(hlsl::RootSignatureHandle *pRootSigHandle,
 void SerializeDxilContainerForRootSignature(hlsl::RootSignatureHandle *pRootSigHandle,
                                      AbstractMemoryStream *pStream);
                                      AbstractMemoryStream *pStream);
 
 

+ 2 - 2
include/dxc/DxilContainer/DxilPipelineStateValidation.h

@@ -443,7 +443,7 @@ public:
   // returns true if no errors occurred.
   // returns true if no errors occurred.
   bool InitFromPSV0(const void* pBits, uint32_t size) {
   bool InitFromPSV0(const void* pBits, uint32_t size) {
     if(!(pBits != nullptr)) return false;
     if(!(pBits != nullptr)) return false;
-    uint8_t* pCurBits = (uint8_t*)pBits;
+    uint8_t* pCurBits = (uint8_t*)const_cast<void*>(pBits);
     uint32_t minsize = sizeof(PSVRuntimeInfo0) + sizeof(uint32_t) * 2;
     uint32_t minsize = sizeof(PSVRuntimeInfo0) + sizeof(uint32_t) * 2;
     if(!(size >= minsize)) return false;
     if(!(size >= minsize)) return false;
     m_uPSVRuntimeInfoSize = *((const uint32_t*)pCurBits);
     m_uPSVRuntimeInfoSize = *((const uint32_t*)pCurBits);
@@ -534,7 +534,7 @@ public:
 
 
       // Input to Output dependencies
       // Input to Output dependencies
       for (unsigned i = 0; i < 4; i++) {
       for (unsigned i = 0; i < 4; i++) {
-        if (m_pPSVRuntimeInfo1->SigOutputVectors[i] > 0 && m_pPSVRuntimeInfo1->SigInputVectors > 0) {
+        if (!IsMS() && m_pPSVRuntimeInfo1->SigOutputVectors[i] > 0 && m_pPSVRuntimeInfo1->SigInputVectors > 0) {
           minsize += PSVComputeInputOutputTableSize(m_pPSVRuntimeInfo1->SigInputVectors, m_pPSVRuntimeInfo1->SigOutputVectors[i]);
           minsize += PSVComputeInputOutputTableSize(m_pPSVRuntimeInfo1->SigInputVectors, m_pPSVRuntimeInfo1->SigOutputVectors[i]);
           if (!(size >= minsize)) return false;
           if (!(size >= minsize)) return false;
           m_pInputToOutputTable = (uint32_t*)pCurBits;
           m_pInputToOutputTable = (uint32_t*)pCurBits;

+ 2 - 0
include/dxc/DxilContainer/DxilRuntimeReflection.h

@@ -137,6 +137,7 @@ public:
   const char *Get(uint32_t offset) const {
   const char *Get(uint32_t offset) const {
     _Analysis_assume_(offset < m_size && m_table &&
     _Analysis_assume_(offset < m_size && m_table &&
                       m_table[m_size - 1] == '\0');
                       m_table[m_size - 1] == '\0');
+    (void)m_size; // avoid unused private warning if use above is ignored.
     return m_table + offset;
     return m_table + offset;
   }
   }
 };
 };
@@ -185,6 +186,7 @@ public:
       : m_table(table), m_size(size) {}
       : m_table(table), m_size(size) {}
   const void *Get(uint32_t offset) const {
   const void *Get(uint32_t offset) const {
     _Analysis_assume_(offset < m_size && m_table);
     _Analysis_assume_(offset < m_size && m_table);
+    (void)m_size; // avoid unused private warning if use above is ignored.
     return (const void*)(((const char*)m_table) + offset);
     return (const void*)(((const char*)m_table) + offset);
   }
   }
 };
 };

+ 2 - 2
include/dxc/DxilRootSignature/DxilRootSignature.h

@@ -240,7 +240,7 @@ struct DxilDescriptorRange {
 };
 };
 struct DxilRootDescriptorTable {
 struct DxilRootDescriptorTable {
   uint32_t NumDescriptorRanges;
   uint32_t NumDescriptorRanges;
-  _Field_size_full_(NumDescriptorRanges)  const DxilDescriptorRange *pDescriptorRanges;
+  _Field_size_full_(NumDescriptorRanges)  DxilDescriptorRange *pDescriptorRanges;
 };
 };
 struct DxilRootConstants {
 struct DxilRootConstants {
   uint32_t ShaderRegister;
   uint32_t ShaderRegister;
@@ -275,7 +275,7 @@ struct DxilDescriptorRange1 {
 };
 };
 struct DxilRootDescriptorTable1 {
 struct DxilRootDescriptorTable1 {
   uint32_t NumDescriptorRanges;
   uint32_t NumDescriptorRanges;
-  _Field_size_full_(NumDescriptorRanges)  const DxilDescriptorRange1 *pDescriptorRanges;
+  _Field_size_full_(NumDescriptorRanges)  DxilDescriptorRange1 *pDescriptorRanges;
 };
 };
 struct DxilRootParameter1 {
 struct DxilRootParameter1 {
   DxilRootParameterType ParameterType;
   DxilRootParameterType ParameterType;

+ 1 - 1
include/dxc/HLSL/DxilConvergentName.h

@@ -11,5 +11,5 @@
 #pragma once
 #pragma once
 
 
 namespace hlsl {
 namespace hlsl {
-  static char *kConvergentFunctionPrefix = "dxil.convergent.marker.";
+  static const char *kConvergentFunctionPrefix = "dxil.convergent.marker.";
 }
 }

+ 77 - 0
include/dxc/HLSL/DxilValueCache.h

@@ -0,0 +1,77 @@
+//===--------- DxilValueCache.cpp - Dxil Constant Value Cache ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+namespace llvm {
+
+class Module;
+class DominatorTree;
+
+struct DxilValueCache : public ModulePass {
+  static char ID;
+
+  // Special Weak Value to Weak Value map.
+  struct WeakValueMap {
+    struct ValueVH : public CallbackVH {
+      ValueVH(Value *V) : CallbackVH(V) {}
+      void allUsesReplacedWith(Value *) override { setValPtr(nullptr); }
+    };
+    struct ValueEntry {
+      WeakVH Value;
+      ValueVH Self;
+      ValueEntry() : Value(nullptr), Self(nullptr) {}
+      inline void Set(llvm::Value *Key, llvm::Value *V) { Self = Key; Value = V; }
+      inline bool IsStale() const { return Self == nullptr; }
+    };
+    ValueMap<const Value *, ValueEntry> Map;
+    Value *Get(Value *V);
+    void Set(Value *Key, Value *V);
+    bool Seen(Value *v);
+    void SetSentinel(Value *V);
+    void dump() const;
+  private:
+    Value *GetSentinel(LLVMContext &Ctx);
+    std::unique_ptr<Value> Sentinel;
+  };
+
+private:
+
+  WeakValueMap ValueMap;
+
+  void MarkAlwaysReachable(BasicBlock *BB);
+  void MarkNeverReachable(BasicBlock *BB);
+  bool IsAlwaysReachable_(BasicBlock *BB);
+  bool IsNeverReachable_(BasicBlock *BB);
+  Value *OptionallyGetValue(Value *V);
+  Value *ProcessValue(Value *V, DominatorTree *DT);
+
+  Value *ProcessAndSimplify_PHI(Instruction *I, DominatorTree *DT);
+  Value *ProcessAndSimpilfy_Br(Instruction *I, DominatorTree *DT);
+  Value *SimplifyAndCacheResult(Instruction *I, DominatorTree *DT);
+
+public:
+
+  const char *getPassName() const override;
+  DxilValueCache();
+
+  bool runOnModule(Module &M) override { return false; } // Doesn't do anything by itself.
+  void dump() const;
+  Value *GetValue(Value *V, DominatorTree *DT=nullptr);
+  bool IsAlwaysReachable(BasicBlock *BB, DominatorTree *DT=nullptr);
+  bool IsNeverReachable(BasicBlock *BB, DominatorTree *DT=nullptr);
+};
+
+void initializeDxilValueCachePass(class llvm::PassRegistry &);
+ModulePass *createDxilValueCachePass();
+
+}
+
+

+ 3 - 3
include/dxc/Support/DxcLangExtensionsHelper.h

@@ -174,9 +174,9 @@ public:
 
 
     // Define a  little function to convert encoded blob into a string.
     // Define a  little function to convert encoded blob into a string.
     auto GetErrorAsString = [&name](const CComPtr<IDxcBlobEncoding> &pBlobString) -> std::string {
     auto GetErrorAsString = [&name](const CComPtr<IDxcBlobEncoding> &pBlobString) -> std::string {
-      CComPtr<IDxcBlobEncoding> pUTF8BlobStr;
-      if (SUCCEEDED(hlsl::DxcGetBlobAsUtf8(pBlobString, &pUTF8BlobStr)))
-        return std::string(static_cast<char*>(pUTF8BlobStr->GetBufferPointer()), pUTF8BlobStr->GetBufferSize());
+      CComPtr<IDxcBlobUtf8> pUTF8BlobStr;
+      if (SUCCEEDED(hlsl::DxcGetBlobAsUtf8(pBlobString, DxcGetThreadMallocNoRef(), &pUTF8BlobStr)))
+        return std::string(pUTF8BlobStr->GetStringPointer(), pUTF8BlobStr->GetStringLength());
       else
       else
         return std::string("invalid semantic define " + name);
         return std::string("invalid semantic define " + name);
     };
     };

+ 25 - 8
include/dxc/Support/FileIOHelper.h

@@ -20,6 +20,8 @@
 // Forward declarations.
 // Forward declarations.
 struct IDxcBlob;
 struct IDxcBlob;
 struct IDxcBlobEncoding;
 struct IDxcBlobEncoding;
+struct IDxcBlobUtf8;
+struct IDxcBlobUtf16;
 
 
 namespace hlsl {
 namespace hlsl {
 
 
@@ -131,6 +133,26 @@ void WriteBinaryFile(_In_z_ LPCWSTR pFileName,
 UINT32 DxcCodePageFromBytes(_In_count_(byteLen) const char *bytes,
 UINT32 DxcCodePageFromBytes(_In_count_(byteLen) const char *bytes,
                             size_t byteLen) throw();
                             size_t byteLen) throw();
 
 
+// More general create blob functions, used by other functions
+// Null pMalloc means use current thread malloc.
+// bPinned will point to existing memory without managing it;
+// bCopy will copy to heap; bPinned and bCopy are mutually exclusive.
+// If encodingKnown, UTF-8 or UTF-16, and null-termination possible,
+// an IDxcBlobUtf8 or IDxcBlobUtf16 will be constructed.
+// If text, it's best if size includes null terminator when not copying,
+// otherwise IDxcBlobUtf8 or IDxcBlobUtf16 will not be constructed.
+HRESULT DxcCreateBlob(
+    LPCVOID pPtr, SIZE_T size, bool bPinned, bool bCopy,
+    bool encodingKnown, UINT32 codePage,
+    IMalloc *pMalloc, IDxcBlobEncoding **ppBlobEncoding) throw();
+// Create from blob references original blob.
+// Pass nonzero for offset or length for sub-blob reference.
+HRESULT DxcCreateBlobEncodingFromBlob(
+    IDxcBlob *pFromBlob, UINT32 offset, UINT32 length,
+    bool encodingKnown, UINT32 codePage,
+    IMalloc *pMalloc, IDxcBlobEncoding **ppBlobEncoding) throw();
+
+// Load files
 HRESULT
 HRESULT
 DxcCreateBlobFromFile(_In_opt_ IMalloc *pMalloc, LPCWSTR pFileName,
 DxcCreateBlobFromFile(_In_opt_ IMalloc *pMalloc, LPCWSTR pFileName,
                       _In_opt_ UINT32 *pCodePage,
                       _In_opt_ UINT32 *pCodePage,
@@ -191,16 +213,11 @@ DxcCreateBlobWithEncodingOnMallocCopy(
   _In_ IMalloc *pIMalloc, _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
   _In_ IMalloc *pIMalloc, _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
   _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
   _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
 
 
-HRESULT DxcGetBlobAsUtf8(_In_ IDxcBlob *pBlob,
-                         _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
-HRESULT
-DxcGetBlobAsUtf8NullTerm(
-    _In_ IDxcBlob *pBlob,
-    _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) throw();
-
+HRESULT DxcGetBlobAsUtf8(_In_ IDxcBlob *pBlob, _In_ IMalloc *pMalloc,
+                         _COM_Outptr_ IDxcBlobUtf8 **pBlobEncoding) throw();
 HRESULT
 HRESULT
 DxcGetBlobAsUtf16(_In_ IDxcBlob *pBlob, _In_ IMalloc *pMalloc,
 DxcGetBlobAsUtf16(_In_ IDxcBlob *pBlob, _In_ IMalloc *pMalloc,
-                  _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
+                  _COM_Outptr_ IDxcBlobUtf16 **pBlobEncoding) throw();
 
 
 bool IsBlobNullOrEmpty(_In_opt_ IDxcBlob *pBlob) throw();
 bool IsBlobNullOrEmpty(_In_opt_ IDxcBlob *pBlob) throw();
 
 

+ 5 - 1
include/dxc/Support/HLSLOptions.h

@@ -102,6 +102,9 @@ public:
   llvm::StringRef OutputHeader; // OPT_Fh
   llvm::StringRef OutputHeader; // OPT_Fh
   llvm::StringRef OutputObject; // OPT_Fo
   llvm::StringRef OutputObject; // OPT_Fo
   llvm::StringRef OutputWarningsFile; // OPT_Fe
   llvm::StringRef OutputWarningsFile; // OPT_Fe
+  llvm::StringRef OutputReflectionFile; // OPT_Fre
+  llvm::StringRef OutputRootSigFile; // OPT_Frs
+  llvm::StringRef OutputShaderHashFile; // OPT_Fsh
   llvm::StringRef Preprocess; // OPT_P
   llvm::StringRef Preprocess; // OPT_P
   llvm::StringRef TargetProfile; // OPT_target_profile
   llvm::StringRef TargetProfile; // OPT_target_profile
   llvm::StringRef VariableName; // OPT_Vn
   llvm::StringRef VariableName; // OPT_Vn
@@ -112,6 +115,7 @@ public:
   llvm::StringRef FloatDenormalMode; // OPT_denorm
   llvm::StringRef FloatDenormalMode; // OPT_denorm
   std::vector<std::string> Exports; // OPT_exports
   std::vector<std::string> Exports; // OPT_exports
   llvm::StringRef DefaultLinkage; // OPT_default_linkage
   llvm::StringRef DefaultLinkage; // OPT_default_linkage
+  unsigned DefaultTextCodePage = DXC_CP_UTF8; // OPT_encoding
 
 
   bool AllResourcesBound = false; // OPT_all_resources_bound
   bool AllResourcesBound = false; // OPT_all_resources_bound
   bool AstDump = false; // OPT_ast_dump
   bool AstDump = false; // OPT_ast_dump
@@ -144,7 +148,7 @@ public:
   bool UseHexLiterals = false; // OPT_Lx
   bool UseHexLiterals = false; // OPT_Lx
   bool UseInstructionByteOffsets = false; // OPT_No
   bool UseInstructionByteOffsets = false; // OPT_No
   bool UseInstructionNumbers = false; // OPT_Ni
   bool UseInstructionNumbers = false; // OPT_Ni
-  bool NotUseLegacyCBufLoad = false;  // OPT_not_use_legacy_cbuf_load
+  bool NotUseLegacyCBufLoad = false;  // OPT_no_legacy_cbuf_layout
   bool PackPrefixStable = false;  // OPT_pack_prefix_stable
   bool PackPrefixStable = false;  // OPT_pack_prefix_stable
   bool PackOptimized = false;  // OPT_pack_optimized
   bool PackOptimized = false;  // OPT_pack_optimized
   bool DisplayIncludeProcess = false; // OPT__vi
   bool DisplayIncludeProcess = false; // OPT__vi

+ 29 - 8
include/dxc/Support/HLSLOptions.td

@@ -219,11 +219,17 @@ def flegacy_macro_expansion : Flag<["-", "/"], "flegacy-macro-expansion">, Group
     HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;
     HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;
 def flegacy_resource_reservation : Flag<["-", "/"], "flegacy-resource-reservation">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
 def flegacy_resource_reservation : Flag<["-", "/"], "flegacy-resource-reservation">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
     HelpText<"Reserve unused explicit register assignments for compatibility with shader model 5.0 and below">;
     HelpText<"Reserve unused explicit register assignments for compatibility with shader model 5.0 and below">;
-def not_use_legacy_cbuf_load : Flag<["-", "/"], "not_use_legacy_cbuf_load">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+def no_legacy_cbuf_layout : Flag<["-", "/"], "no-legacy-cbuf-layout">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Do not use legacy cbuffer load">;
   HelpText<"Do not use legacy cbuffer load">;
-def pack_prefix_stable : Flag<["-", "/"], "pack_prefix_stable">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+def not_use_legacy_cbuf_load_ : Flag<["-", "/"], "not_use_legacy_cbuf_load">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
+  HelpText<"Do not use legacy cbuffer load">;
+def pack_prefix_stable : Flag<["-", "/"], "pack-prefix-stable">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"(default) Pack signatures preserving prefix-stable property - appended elements will not disturb placement of prior elements">;
+def pack_prefix_stable_ : Flag<["-", "/"], "pack_prefix_stable">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"(default) Pack signatures preserving prefix-stable property - appended elements will not disturb placement of prior elements">;
   HelpText<"(default) Pack signatures preserving prefix-stable property - appended elements will not disturb placement of prior elements">;
-def pack_optimized : Flag<["-", "/"], "pack_optimized">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+def pack_optimized : Flag<["-", "/"], "pack-optimized">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"Optimize signature packing assuming identical signature provided for each connecting stage">;
+def pack_optimized_ : Flag<["-", "/"], "pack_optimized">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Optimize signature packing assuming identical signature provided for each connecting stage">;
   HelpText<"Optimize signature packing assuming identical signature provided for each connecting stage">;
 def hlsl_version : Separate<["-", "/"], "HV">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
 def hlsl_version : Separate<["-", "/"], "HV">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"HLSL version (2016, 2017, 2018). Default is 2018">;
   HelpText<"HLSL version (2016, 2017, 2018). Default is 2018">;
@@ -242,9 +248,15 @@ def export_shaders_only : Flag<["-", "/"], "export-shaders-only">, Group<hlslcom
   HelpText<"Only export shaders when compiling a library">;
   HelpText<"Only export shaders when compiling a library">;
 def default_linkage : Separate<["-", "/"], "default-linkage">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
 def default_linkage : Separate<["-", "/"], "default-linkage">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Set default linkage for non-shader functions when compiling or linking to a library target (internal, external)">;
   HelpText<"Set default linkage for non-shader functions when compiling or linking to a library target (internal, external)">;
+def encoding : Separate<["-", "/"], "encoding">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Set default encoding for text outputs (utf8|utf16) default=utf8">;
 def validator_version : Separate<["-", "/"], "validator-version">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
 def validator_version : Separate<["-", "/"], "validator-version">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Override validator version for module.  Format: <major.minor> ; Default: DXIL.dll version or current internal version.">;
   HelpText<"Override validator version for module.  Format: <major.minor> ; Default: DXIL.dll version or current internal version.">;
 
 
+// Used with API only
+def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
+  HelpText<"Return a module interface instead of serialized output">;
+
 // SPIRV Change Starts
 // SPIRV Change Starts
 def spirv : Flag<["-"], "spirv">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
 def spirv : Flag<["-"], "spirv">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Generate SPIR-V code">;
   HelpText<"Generate SPIR-V code">;
@@ -316,9 +328,9 @@ def Zpr : Flag<["-", "/"], "Zpr">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def Zpc : Flag<["-", "/"], "Zpc">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def Zpc : Flag<["-", "/"], "Zpc">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Pack matrices in column-major order">;
   HelpText<"Pack matrices in column-major order">;
 def Zss : Flag<["-", "/"], "Zss">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def Zss : Flag<["-", "/"], "Zss">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
-  HelpText<"Build debug name considering source information">;
+  HelpText<"Compute Shader Hash considering source information">;
 def Zsb : Flag<["-", "/"], "Zsb">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def Zsb : Flag<["-", "/"], "Zsb">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
-  HelpText<"Build debug name considering only output binary">;
+  HelpText<"Compute Shader Hash considering only output binary">;
 
 
 // deprecated /Gpp def Gpp : Flag<["-", "/"], "Gpp">, HelpText<"Force partial precision">;
 // deprecated /Gpp def Gpp : Flag<["-", "/"], "Gpp">, HelpText<"Force partial precision">;
 def Gfa : Flag<["-", "/"], "Gfa">, HelpText<"Avoid flow control constructs">, Flags<[CoreOption]>, Group<hlslcomp_Group>;
 def Gfa : Flag<["-", "/"], "Gfa">, HelpText<"Avoid flow control constructs">, Flags<[CoreOption]>, Group<hlslcomp_Group>;
@@ -339,6 +351,10 @@ def Fe : JoinedOrSeparate<["-", "/"], "Fe">, MetaVarName<"<file>">, HelpText<"Ou
 def Fd : JoinedOrSeparate<["-", "/"], "Fd">, MetaVarName<"<file>">,
 def Fd : JoinedOrSeparate<["-", "/"], "Fd">, MetaVarName<"<file>">,
   HelpText<"Write debug information to the given file, or automatically named file in directory when ending in '\\'">,
   HelpText<"Write debug information to the given file, or automatically named file in directory when ending in '\\'">,
   Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
   Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+def Fre : Separate<["-", "/"], "Fre">, MetaVarName<"<file>">, HelpText<"Output reflection to the given file">, Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+def Frs : Separate<["-", "/"], "Frs">, MetaVarName<"<file>">, HelpText<"Output root signature to the given file">, Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+def Fsh : Separate<["-", "/"], "Fsh">, MetaVarName<"<file>">, HelpText<"Output shader hash to the given file">, Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+
 def Vn : JoinedOrSeparate<["-", "/"], "Vn">, MetaVarName<"<name>">, HelpText<"Use <name> as variable name in header file">, Flags<[DriverOption]>, Group<hlslcomp_Group>;
 def Vn : JoinedOrSeparate<["-", "/"], "Vn">, MetaVarName<"<name>">, HelpText<"Use <name> as variable name in header file">, Flags<[DriverOption]>, Group<hlslcomp_Group>;
 def Cc : Flag<["-", "/"], "Cc">, HelpText<"Output color coded assembly listings">, Group<hlslcomp_Group>, Flags<[DriverOption]>;
 def Cc : Flag<["-", "/"], "Cc">, HelpText<"Output color coded assembly listings">, Group<hlslcomp_Group>, Flags<[DriverOption]>;
 def Ni : Flag<["-", "/"], "Ni">, HelpText<"Output instruction numbers in assembly listings">, Group<hlslcomp_Group>, Flags<[DriverOption]>;
 def Ni : Flag<["-", "/"], "Ni">, HelpText<"Output instruction numbers in assembly listings">, Group<hlslcomp_Group>, Flags<[DriverOption]>;
@@ -366,7 +382,8 @@ def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[Core
 def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
 def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
 def extractrootsignature : Flag<["-", "/"], "extractrootsignature">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode (must be used with /Fo <file>)">;
 def extractrootsignature : Flag<["-", "/"], "extractrootsignature">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode (must be used with /Fo <file>)">;
 def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
 def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
-def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
+def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force-rootsig-ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
+def force_rootsig_ver_    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption, HelpHidden]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
 
 
 // Temporary TEST options, until reflection inside DXIL part can always be stripped
 // Temporary TEST options, until reflection inside DXIL part can always be stripped
 def Qkeep_reflect_in_dxil : Flag<["-", "/"], "Qkeep_reflect_in_dxil">,
 def Qkeep_reflect_in_dxil : Flag<["-", "/"], "Qkeep_reflect_in_dxil">,
@@ -386,9 +403,13 @@ def matchUAVs : JoinedOrSeparate<["-", "/"], "matchUAVs">, MetaVarName<"<file>">
 def enable_unbounded_descriptor_tables : Flag<["-", "/"], "enable_unbounded_descriptor_tables">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def enable_unbounded_descriptor_tables : Flag<["-", "/"], "enable_unbounded_descriptor_tables">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Enables unbounded descriptor tables">;
   HelpText<"Enables unbounded descriptor tables">;
 */
 */
-def res_may_alias : Flag<["-", "/"], "res_may_alias">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+def res_may_alias : Flag<["-", "/"], "res-may-alias">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+  HelpText<"Assume that UAVs/SRVs may alias">;
+def res_may_alias_ : Flag<["-", "/"], "res_may_alias">, Flags<[CoreOption, HelpHidden]>, Group<hlslcomp_Group>,
   HelpText<"Assume that UAVs/SRVs may alias">;
   HelpText<"Assume that UAVs/SRVs may alias">;
-def all_resources_bound : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+def all_resources_bound : Flag<["-", "/"], "all-resources-bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+  HelpText<"Enables agressive flattening">;
+def all_resources_bound_ : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption, HelpHidden]>, Group<hlslcomp_Group>,
   HelpText<"Enables agressive flattening">;
   HelpText<"Enables agressive flattening">;
 
 
 def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,
 def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,

+ 7 - 0
include/dxc/Support/Unicode.h

@@ -45,9 +45,15 @@ typedef char acp_char;
 // A ccp_char is a character encoded in the console code page.
 // A ccp_char is a character encoded in the console code page.
 typedef char ccp_char;
 typedef char ccp_char;
 
 
+_Success_(return != false)
+bool UTF8ToConsoleString(_In_opt_count_(textLen) const char* text, _In_ size_t textLen, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
+
 _Success_(return != false)
 _Success_(return != false)
 bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
 bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
 
 
+_Success_(return != false)
+bool UTF16ToConsoleString(_In_opt_count_(textLen) const wchar_t* text, _In_ size_t textLen, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
+
 _Success_(return != false)
 _Success_(return != false)
 bool UTF16ToConsoleString(_In_z_ const wchar_t* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
 bool UTF16ToConsoleString(_In_z_ const wchar_t* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
 
 
@@ -60,6 +66,7 @@ bool UTF8ToUTF16String(_In_opt_count_(cbUTF8) const char *pUTF8, size_t cbUTF8,
 std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8);
 std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8);
 
 
 _Success_(return != false)
 _Success_(return != false)
+bool UTF16ToUTF8String(_In_z_ const wchar_t *pUTF16, size_t cUTF16, _Inout_ std::string *pUTF8);
 bool UTF16ToUTF8String(_In_z_ const wchar_t *pUTF16, _Inout_ std::string *pUTF8);
 bool UTF16ToUTF8String(_In_z_ const wchar_t *pUTF16, _Inout_ std::string *pUTF8);
 
 
 std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16);
 std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16);

+ 2 - 0
include/dxc/Support/WinAdapter.h

@@ -268,6 +268,7 @@
 #define _In_count_(size)
 #define _In_count_(size)
 #define _In_range_(lb, ub)
 #define _In_range_(lb, ub)
 #define _In_bytecount_(size)
 #define _In_bytecount_(size)
+#define _In_opt_bytecount_(size)
 #define _In_NLS_string_(size)
 #define _In_NLS_string_(size)
 #define __in_bcount(size)
 #define __in_bcount(size)
 
 
@@ -333,6 +334,7 @@
 #define _COM_Outptr_
 #define _COM_Outptr_
 #define _COM_Outptr_opt_
 #define _COM_Outptr_opt_
 #define _COM_Outptr_result_maybenull_
 #define _COM_Outptr_result_maybenull_
+#define _COM_Outptr_opt_result_maybenull_
 
 
 #define _Null_
 #define _Null_
 #define _Notnull_
 #define _Notnull_

+ 463 - 50
include/dxc/Support/dxcapi.impl.h

@@ -15,6 +15,7 @@
 #include "dxc/dxcapi.h"
 #include "dxc/dxcapi.h"
 #include "dxc/Support/microcom.h"
 #include "dxc/Support/microcom.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/ArrayRef.h"
 
 
 // Simple adaptor for IStream. Can probably do better.
 // Simple adaptor for IStream. Can probably do better.
 class raw_stream_ostream : public llvm::raw_ostream {
 class raw_stream_ostream : public llvm::raw_ostream {
@@ -32,70 +33,269 @@ public:
   }
   }
 };
 };
 
 
-class DxcOperationResult : public IDxcOperationResult {
-private:
-  DXC_MICROCOM_TM_REF_FIELDS()
+namespace {
+HRESULT TranslateUtf8StringForOutput(
+    _In_opt_count_(size) LPCSTR pStr, SIZE_T size, UINT32 codePage, IDxcBlobEncoding **ppBlobEncoding) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  IFR(hlsl::DxcCreateBlobWithEncodingOnHeapCopy(pStr, size, DXC_CP_UTF8, &pBlobEncoding));
+  if (codePage == DXC_CP_UTF16) {
+    CComPtr<IDxcBlobUtf16> pBlobUtf16;
+    IFT(hlsl::DxcGetBlobAsUtf16(pBlobEncoding, nullptr, &pBlobUtf16))
+      pBlobEncoding = pBlobUtf16;
+  }
+  *ppBlobEncoding = pBlobEncoding.Detach();
+  return S_OK;
+}
 
 
-  void Init(_In_opt_ IDxcBlob *pResultBlob,
-            _In_opt_ IDxcBlobEncoding *pErrorBlob, HRESULT status) {
-    m_status = status;
-    m_result = pResultBlob;
-    m_errors = pErrorBlob;
+HRESULT TranslateUtf16StringForOutput(
+    _In_opt_count_(size) LPCWSTR pStr, SIZE_T size, UINT32 codePage, IDxcBlobEncoding **ppBlobEncoding) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  IFR(hlsl::DxcCreateBlobWithEncodingOnHeapCopy(pStr, size, DXC_CP_UTF16, &pBlobEncoding));
+  if (codePage == DXC_CP_UTF8) {
+    CComPtr<IDxcBlobUtf8> pBlobUtf8;
+    IFT(hlsl::DxcGetBlobAsUtf8(pBlobEncoding, nullptr, &pBlobUtf8))
+      pBlobEncoding = pBlobUtf8;
   }
   }
+  *ppBlobEncoding = pBlobEncoding.Detach();
+  return S_OK;
+}
 
 
-public:
-  DXC_MICROCOM_TM_ADDREF_RELEASE_IMPL()
-  DXC_MICROCOM_TM_CTOR(DxcOperationResult)
+HRESULT TranslateStringBlobForOutput(IDxcBlob *pBlob, UINT32 codePage, IDxcBlobEncoding **ppBlobEncoding) {
+  CComPtr<IDxcBlobEncoding> pEncoding;
+  IFR(pBlob->QueryInterface(&pEncoding));
+  BOOL known;
+  UINT32 inputCP;
+  IFR(pEncoding->GetEncoding(&known, &inputCP));
+  IFRBOOL(known, E_INVALIDARG);
+  if (inputCP == DXC_CP_UTF8) {
+    return TranslateUtf8StringForOutput((LPCSTR)pBlob->GetBufferPointer(), pBlob->GetBufferSize(), codePage, ppBlobEncoding);
+  } else if (inputCP == DXC_CP_UTF16) {
+    return TranslateUtf16StringForOutput((LPCWSTR)pBlob->GetBufferPointer(), pBlob->GetBufferSize(), codePage, ppBlobEncoding);
+  }
+  return E_INVALIDARG;
+}
+}
 
 
-  HRESULT m_status;
-  CComPtr<IDxcBlob> m_result;
-  CComPtr<IDxcBlobEncoding> m_errors;
+typedef enum DxcOutputType {
+  DxcOutputType_None    = 0,
+  DxcOutputType_Blob    = 1,
+  DxcOutputType_Text    = 2,
 
 
-  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void **ppvObject) override {
-    return DoBasicQueryInterface<IDxcOperationResult>(this, iid, ppvObject);
+  DxcOutputTypeForceDword = 0xFFFFFFFF
+} DxcOutputType;
+
+inline DxcOutputType DxcGetOutputType(DXC_OUT_KIND kind) {
+  switch (kind) {
+  case DXC_OUT_OBJECT:
+  case DXC_OUT_PDB:
+  case DXC_OUT_SHADER_HASH:
+  case DXC_OUT_REFLECTION:
+  case DXC_OUT_ROOT_SIGNATURE:
+    return DxcOutputType_Blob;
+  case DXC_OUT_ERRORS:
+  case DXC_OUT_DISASSEMBLY:
+  case DXC_OUT_HLSL:
+  case DXC_OUT_TEXT:
+    return DxcOutputType_Text;
   }
   }
+  return DxcOutputType_None;
+}
 
 
-  static HRESULT CreateFromResultErrorStatus(_In_opt_ IDxcBlob *pResultBlob,
-                                             _In_opt_ IDxcBlobEncoding *pErrorBlob,
-                                             HRESULT status,
-                                             _COM_Outptr_ IDxcOperationResult **ppResult) {
-    *ppResult = nullptr;
-    CComPtr<DxcOperationResult> result = DxcOperationResult::Alloc(DxcGetThreadMallocNoRef());
-    IFROOM(result.p);
-    result->Init(pResultBlob, pErrorBlob, status);
-    *ppResult = result.Detach();
+// Update when new results are allowed
+static const unsigned kNumDxcOutputTypes = DXC_OUT_ROOT_SIGNATURE;
+static const SIZE_T kAutoSize = (SIZE_T)-1;
+static const LPCWSTR DxcOutNoName = nullptr;
+
+struct DxcOutputObject {
+  CComPtr<IUnknown> object;
+  CComPtr<IDxcBlobUtf16> name;
+  DXC_OUT_KIND kind = DXC_OUT_NONE;
+
+  /////////////////////////
+  // Convenient set methods
+  /////////////////////////
+
+  HRESULT SetObject(IUnknown *pUnknown, UINT32 codePage = DXC_CP_UTF8) {
+    DXASSERT_NOMSG(!object);
+    if (!pUnknown)
+      return S_OK;
+    if (codePage && DxcGetOutputType(kind) == DxcOutputType_Text) {
+      CComPtr<IDxcBlob> pBlob;
+      IFR(pUnknown->QueryInterface(&pBlob));
+      CComPtr<IDxcBlobEncoding> pEncoding;
+      // If not blob encoding, assume utf-8 text
+      if (FAILED(TranslateStringBlobForOutput(pBlob, codePage, &pEncoding)))
+        IFR(TranslateUtf8StringForOutput(
+          (LPCSTR)pBlob->GetBufferPointer(), pBlob->GetBufferSize(),
+          codePage, &pEncoding));
+      object = pEncoding;
+    } else {
+      object = pUnknown;
+    }
     return S_OK;
     return S_OK;
   }
   }
+  HRESULT SetObjectData(_In_opt_bytecount_(size) LPCVOID pData, SIZE_T size) {
+    DXASSERT_NOMSG(!object);
+    if (!pData || !size)
+      return S_OK;
+    IDxcBlob *pBlob;
+    IFR(hlsl::DxcCreateBlobOnHeapCopy(pData, size, &pBlob));
+    object = pBlob;
+    return S_OK;
+  }
+  HRESULT SetString(_In_ UINT32 codePage, _In_opt_count_(size) LPCWSTR pText, SIZE_T size = kAutoSize) {
+    DXASSERT_NOMSG(!object);
+    if (!pText)
+      return S_OK;
+    if (size == kAutoSize)
+      size = wcslen(pText);
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf16StringForOutput(pText, size, codePage, &pBlobEncoding));
+    object = pBlobEncoding;
+    return S_OK;
+  }
+  HRESULT SetString(_In_ UINT32 codePage, _In_opt_count_(size) LPCSTR pText, SIZE_T size = kAutoSize) {
+    DXASSERT_NOMSG(!object);
+    if (!pText)
+      return S_OK;
+    if (size == kAutoSize)
+      size = strlen(pText);
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf8StringForOutput(pText, size, codePage, &pBlobEncoding));
+    object = pBlobEncoding;
+    return S_OK;
+  }
+  HRESULT SetName(_In_opt_z_ IDxcBlobUtf16 *pName) {
+    DXASSERT_NOMSG(!name);
+    name = pName;
+    return S_OK;
+  }
+  HRESULT SetName(_In_opt_z_ LPCWSTR pName) {
+    DXASSERT_NOMSG(!name);
+    if (!pName)
+      return S_OK;
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(hlsl::DxcCreateBlobWithEncodingOnHeapCopy(
+          pName, (wcslen(pName) + 1) * sizeof(wchar_t), DXC_CP_UTF16, &pBlobEncoding));
+    return pBlobEncoding->QueryInterface(&name);
+  }
+  HRESULT SetName(_In_opt_z_ LPCSTR pName) {
+    DXASSERT_NOMSG(!name);
+    if (!pName)
+      return S_OK;
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf8StringForOutput(pName, strlen(pName) + 1, DXC_CP_UTF16, &pBlobEncoding));
+    return pBlobEncoding->QueryInterface(&name);
+  }
+  HRESULT SetName(_In_opt_z_ llvm::StringRef Name) {
+    DXASSERT_NOMSG(!name);
+    if (!Name.empty())
+      return S_OK;
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf8StringForOutput(Name.data(), Name.size(), DXC_CP_UTF16, &pBlobEncoding));
+    return pBlobEncoding->QueryInterface(&name);
+  }
 
 
-  static HRESULT
-  CreateFromUtf8Strings(_In_opt_z_ LPCSTR pErrorStr,
-      _In_opt_z_ LPCSTR pResultStr, HRESULT status,
-      _COM_Outptr_ IDxcOperationResult **pResult) {
-    *pResult = nullptr;
-    CComPtr<IDxcBlobEncoding> resultBlob;
-    CComPtr<IDxcBlobEncoding> errorBlob;
+  /////////////////////////////
+  // Static object constructors
+  /////////////////////////////
 
 
-    HRESULT hr = S_OK;
+  template<typename DataTy, typename NameTy>
+  static DxcOutputObject StringOutput(_In_ DXC_OUT_KIND kind,
+                                      _In_ UINT32 codePage,
+                                      _In_opt_count_(size) DataTy pText, _In_ SIZE_T size,
+                                      _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetString(codePage, pText, size));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  template<typename DataTy, typename NameTy>
+  static DxcOutputObject StringOutput(_In_ DXC_OUT_KIND kind,
+                                      _In_ UINT32 codePage,
+                                      _In_opt_ DataTy pText,
+                                      _In_opt_z_ NameTy pName) {
+    return StringOutput(kind, codePage, pText, kAutoSize, pName);
+  }
+  template<typename NameTy>
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_opt_bytecount_(size) LPCVOID pData, _In_ SIZE_T size,
+                                    _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetObjectData(pData, size));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  template<typename NameTy>
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_opt_ IDxcBlob *pBlob,
+                                    _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetObject(pBlob));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_opt_ IDxcBlob *pBlob) {
+    return DataOutput(kind, pBlob, DxcOutNoName);
+  }
+  template<typename NameTy>
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_ UINT32 codePage,
+                                    _In_opt_ IDxcBlob *pBlob,
+                                    _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetObject(pBlob, codePage));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_ UINT32 codePage,
+                                    _In_opt_ IDxcBlob *pBlob) {
+    return DataOutput(kind, codePage, pBlob, DxcOutNoName);
+  }
 
 
-    if (pErrorStr != nullptr) {
-      hr = hlsl::DxcCreateBlobWithEncodingOnHeapCopy(
-        pErrorStr, strlen(pErrorStr), CP_UTF8, &errorBlob);
-      if (FAILED(hr)) {
-        return hr;
-      }
-    }
+  template<typename DataTy>
+  static DxcOutputObject ErrorOutput(UINT32 codePage, DataTy pText, SIZE_T size) {
+    return StringOutput(DXC_OUT_ERRORS, codePage, pText, size, DxcOutNoName);
+  }
+  template<typename DataTy>
+  static DxcOutputObject ErrorOutput(UINT32 codePage, DataTy pText) {
+    return StringOutput(DXC_OUT_ERRORS, codePage, pText, DxcOutNoName);
+  }
+  template<typename NameTy>
+  static DxcOutputObject ObjectOutput(LPCVOID pData, SIZE_T size, NameTy pName) {
+    return DataOutput(DXC_OUT_OBJECT, pData, size, pName);
+  }
+  static DxcOutputObject ObjectOutput(LPCVOID pData, SIZE_T size) {
+    return DataOutput(DXC_OUT_OBJECT, pData, size, DxcOutNoName);
+  }
+};
 
 
-    if (pResultStr != nullptr) {
-      hr = hlsl::DxcCreateBlobWithEncodingOnHeapCopy(
-        pResultStr, strlen(pResultStr), CP_UTF8, &resultBlob);
-      if (FAILED(hr)) {
-        return hr;
-      }
-    }
+class DxcResult : public IDxcResult {
+private:
+  DXC_MICROCOM_TM_REF_FIELDS()
+  HRESULT m_status = S_OK;
+  DxcOutputObject m_outputs[kNumDxcOutputTypes];  // indexed by DXC_OUT_KIND enum - 1
+  DXC_OUT_KIND m_resultType = DXC_OUT_NONE;       // result type for GetResult()
+  UINT32 m_textEncoding = DXC_CP_UTF8;              // encoding for text outputs
+
+public:
+  DXC_MICROCOM_TM_ADDREF_RELEASE_IMPL()
+  DXC_MICROCOM_TM_CTOR(DxcResult)
 
 
-    return CreateFromResultErrorStatus(resultBlob, errorBlob, status, pResult);
+  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void **ppvObject) override {
+    return DoBasicQueryInterface<IDxcResult, IDxcOperationResult>(this, iid, ppvObject);
   }
   }
 
 
+  //////////////////////
+  // IDxcOperationResult
+  //////////////////////
+
   HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) override {
   HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) override {
     if (pStatus == nullptr)
     if (pStatus == nullptr)
       return E_INVALIDARG;
       return E_INVALIDARG;
@@ -106,12 +306,225 @@ public:
 
 
   HRESULT STDMETHODCALLTYPE
   HRESULT STDMETHODCALLTYPE
     GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) override {
     GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) override {
-    return m_result.CopyTo(ppResult);
+    *ppResult = nullptr;
+    if (m_resultType == DXC_OUT_NONE)
+      return S_OK;
+    DxcOutputObject *pObject = Output(m_resultType);
+    if (pObject && pObject->object)
+      return pObject->object->QueryInterface(ppResult);
+    return S_OK;
   }
   }
 
 
   HRESULT STDMETHODCALLTYPE
   HRESULT STDMETHODCALLTYPE
     GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) override {
     GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) override {
-    return m_errors.CopyTo(ppErrors);
+    *ppErrors = nullptr;
+    DxcOutputObject *pObject = Output(DXC_OUT_ERRORS);
+    if (pObject && pObject->object)
+      return pObject->object->QueryInterface(ppErrors);
+    return S_OK;
+  }
+
+  /////////////
+  // IDxcResult
+  /////////////
+
+  BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) override {
+    if (dxcOutKind <= DXC_OUT_NONE || (unsigned)dxcOutKind > kNumDxcOutputTypes)
+      return FALSE;
+    return m_outputs[(unsigned)dxcOutKind - 1].kind != DXC_OUT_NONE;
+  }
+  HRESULT STDMETHODCALLTYPE GetOutput(_In_ DXC_OUT_KIND dxcOutKind,
+      _In_ REFIID iid, _COM_Outptr_opt_result_maybenull_ void **ppvObject,
+      _COM_Outptr_ IDxcBlobUtf16 **ppOutputName) override {
+    if (ppvObject == nullptr)
+      return E_INVALIDARG;
+    if (dxcOutKind <= DXC_OUT_NONE || (unsigned)dxcOutKind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &object = m_outputs[(unsigned)dxcOutKind - 1];
+    if (object.kind == DXC_OUT_NONE)
+      return E_INVALIDARG;
+    *ppvObject = nullptr;
+    if (ppOutputName)
+      *ppOutputName = nullptr;
+    IFR(object.object->QueryInterface(iid, ppvObject));
+    if (ppOutputName && object.name) {
+      object.name.CopyTo(ppOutputName);
+    }
+    return S_OK;
+  }
+
+  UINT32 GetNumOutputs() override {
+    UINT32 numOutputs = 0;
+    for (unsigned i = 0; i < kNumDxcOutputTypes; ++i) {
+      if (m_outputs[i].kind != DXC_OUT_NONE)
+        numOutputs++;
+    }
+    return numOutputs;
+  }
+  DXC_OUT_KIND GetOutputByIndex(UINT32 Index) override {
+    if (!(Index < kNumDxcOutputTypes))
+      return DXC_OUT_NONE;
+    UINT32 numOutputs = 0;
+    unsigned i = 0;
+    for (; i < kNumDxcOutputTypes; ++i) {
+      if (Index == numOutputs)
+        return m_outputs[i].kind;
+      if (m_outputs[i].kind != DXC_OUT_NONE)
+        numOutputs++;
+    }
+    return DXC_OUT_NONE;
+  }
+  DXC_OUT_KIND PrimaryOutput() override {
+    return m_resultType;
+  }
+
+  /////////////////////
+  // Internal Interface
+  /////////////////////
+
+  HRESULT SetEncoding(UINT32 textEncoding) {
+    if (textEncoding != DXC_CP_ACP && textEncoding != DXC_CP_UTF8 && textEncoding != DXC_CP_UTF16)
+      return E_INVALIDARG;
+    m_textEncoding = textEncoding;
+    return S_OK;
+  }
+
+  DxcOutputObject *Output(DXC_OUT_KIND kind) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return nullptr;
+    return &(m_outputs[(unsigned)kind - 1]);
+  }
+
+  HRESULT ClearOutput(DXC_OUT_KIND kind) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &output = m_outputs[(unsigned)kind - 1];
+    output.kind = DXC_OUT_NONE;
+    output.object.Release();
+    output.name.Release();
+    return S_OK;
+  }
+
+  void ClearAllOutputs() {
+    for (unsigned i = DXC_OUT_NONE + 1; i <= kNumDxcOutputTypes; i++)
+      ClearOutput((DXC_OUT_KIND)(i));
+  }
+
+  HRESULT SetStatusAndPrimaryResult(HRESULT status, DXC_OUT_KIND resultType = DXC_OUT_NONE) {
+    if ((unsigned)resultType > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    m_status = status;
+    m_resultType = resultType;
+    return S_OK;
+  }
+
+  // Set output object and name for previously uninitialized entry
+  HRESULT SetOutput(const DxcOutputObject &output) {
+    if (output.kind <= DXC_OUT_NONE || (unsigned)output.kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    if (!output.object)
+      return E_INVALIDARG;
+    DxcOutputObject &internalOutput = m_outputs[(unsigned)output.kind - 1];
+    // Must not be overwriting an existing output
+    if (internalOutput.kind != DXC_OUT_NONE)
+      return E_INVALIDARG;
+    internalOutput = output;
+    return S_OK;
+  }
+
+  // Set or overwrite output object and set the kind
+  HRESULT SetOutputObject(DXC_OUT_KIND kind, IUnknown *pObject) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &output = m_outputs[(unsigned)kind - 1];
+    if (!pObject)
+      kind = DXC_OUT_NONE;
+    output.kind = kind;
+    output.SetObject(pObject, m_textEncoding);
+    return S_OK;
+  }
+  // Set or overwrite output string object and set the kind
+  template<typename StringTy>
+  HRESULT SetOutputString(DXC_OUT_KIND kind, StringTy pString, size_t size = kAutoSize) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &output = m_outputs[(unsigned)kind - 1];
+    if (!pString)
+      kind = DXC_OUT_NONE;
+    output.kind = kind;
+    output.SetString(m_textEncoding, pString, size);
+    return S_OK;
+  }
+  // Set or overwrite the output name.  This does not set kind,
+  // since that indicates an active output, which must have an object.
+  template<typename NameTy>
+  HRESULT SetOutputName(DXC_OUT_KIND kind, NameTy Name) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    Output(kind)->SetName(Name);
+    return S_OK;
+  }
+
+  HRESULT SetOutputs(const llvm::ArrayRef<DxcOutputObject> outputs) {
+    for (unsigned i = 0; i < outputs.size(); i++) {
+      const DxcOutputObject &output = outputs.data()[i];
+      // Skip if DXC_OUT_NONE or no object to store
+      if (output.kind == DXC_OUT_NONE || !output.object)
+        continue;
+      IFR(SetOutput(output));
+    }
+    return S_OK;
+  }
+
+  HRESULT CopyOutputsFromResult(IDxcResult *pResult) {
+    if (!pResult)
+      return E_INVALIDARG;
+    for (unsigned i = 0; i < kNumDxcOutputTypes; i++) {
+      DxcOutputObject &output = m_outputs[i];
+      DXC_OUT_KIND kind = (DXC_OUT_KIND)(i + 1);
+      if (pResult->HasOutput(kind)) {
+        IFR(pResult->GetOutput(kind, IID_PPV_ARGS(&output.object), &output.name));
+        output.kind = kind;
+      }
+    }
+    return S_OK;
+  }
+
+  // All-in-one initialization
+  HRESULT Init(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+               const llvm::ArrayRef<DxcOutputObject> outputs) {
+    m_status = status;
+    m_resultType = resultType;
+    return SetOutputs(outputs);
+  }
+
+  // All-in-one create functions
+
+  static HRESULT Create(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+                        _In_opt_count_(numOutputs) const DxcOutputObject *pOutputs,
+                        _In_ unsigned numOutputs,
+                        _COM_Outptr_ IDxcResult **ppResult) {
+    *ppResult = nullptr;
+    CComPtr<DxcResult> result =
+      DxcResult::Alloc(DxcGetThreadMallocNoRef());
+    IFROOM(result.p);
+    IFR(result->Init(status, resultType, llvm::ArrayRef<DxcOutputObject>(pOutputs, numOutputs)));
+    *ppResult = result.Detach();
+    return S_OK;
+  }
+  static HRESULT Create(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+                        const llvm::ArrayRef<DxcOutputObject> outputs,
+                        _COM_Outptr_ IDxcResult **ppResult) {
+    return Create(status, resultType, outputs.data(), outputs.size(), ppResult);
+  }
+  // For convenient use in legacy interface implementations
+  static HRESULT Create(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+                        const llvm::ArrayRef<DxcOutputObject> outputs,
+                        _COM_Outptr_ IDxcOperationResult **ppResult) {
+    IDxcResult *pResult;
+    IFR(Create(status, resultType, outputs.data(), outputs.size(), &pResult));
+    *ppResult = pResult;
+    return S_OK;
   }
   }
 };
 };
 
 

+ 2 - 2
include/dxc/Support/dxcapi.use.h

@@ -166,8 +166,8 @@ void EnsureEnabled(DxcDllSupport &dxcSupport);
 void ReadFileIntoBlob(DxcDllSupport &dxcSupport, _In_ LPCWSTR pFileName,
 void ReadFileIntoBlob(DxcDllSupport &dxcSupport, _In_ LPCWSTR pFileName,
                       _Outptr_ IDxcBlobEncoding **ppBlobEncoding);
                       _Outptr_ IDxcBlobEncoding **ppBlobEncoding);
 void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType = STD_OUTPUT_HANDLE);
 void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType = STD_OUTPUT_HANDLE);
-void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName);
-void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, HANDLE hFile, _In_opt_ LPCWSTR pFileName);
+void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName, _In_ UINT32 textCodePage);
+void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, _In_ HANDLE hFile, _In_opt_ LPCWSTR pFileName, _In_ UINT32 textCodePage);
 void WriteUtf8ToConsole(_In_opt_count_(charCount) const char *pText,
 void WriteUtf8ToConsole(_In_opt_count_(charCount) const char *pText,
                         int charCount, DWORD streamType = STD_OUTPUT_HANDLE);
                         int charCount, DWORD streamType = STD_OUTPUT_HANDLE);
 void WriteUtf8ToConsoleSizeT(_In_opt_count_(charCount) const char *pText,
 void WriteUtf8ToConsoleSizeT(_In_opt_count_(charCount) const char *pText,

+ 1 - 1
include/dxc/Support/dxcfilesystem.h

@@ -43,7 +43,7 @@ public:
 };
 };
 
 
 DxcArgsFileSystem *
 DxcArgsFileSystem *
-CreateDxcArgsFileSystem(_In_ IDxcBlob *pSource, _In_ LPCWSTR pSourceName,
+CreateDxcArgsFileSystem(_In_ IDxcBlobUtf8 *pSource, _In_ LPCWSTR pSourceName,
                         _In_opt_ IDxcIncludeHandler *pIncludeHandler);
                         _In_opt_ IDxcIncludeHandler *pIncludeHandler);
 
 
 } // namespace dxcutil
 } // namespace dxcutil

+ 321 - 71
include/dxc/dxcapi.h

@@ -35,23 +35,6 @@ struct IMalloc;
 
 
 struct IDxcIncludeHandler;
 struct IDxcIncludeHandler;
 
 
-/// <summary>
-/// Creates a single uninitialized object of the class associated with a specified CLSID.
-/// </summary>
-/// <param name="rclsid">
-/// The CLSID associated with the data and code that will be used to create the object.
-/// </param>
-/// <param name="riid">
-/// A reference to the identifier of the interface to be used to communicate 
-/// with the object.
-/// </param>
-/// <param name="ppv">
-/// Address of pointer variable that receives the interface pointer requested
-/// in riid. Upon successful return, *ppv contains the requested interface
-/// pointer. Upon failure, *ppv contains NULL.</param>
-/// <remarks>
-/// While this function is similar to CoCreateInstance, there is no COM involvement.
-/// </remarks>
 typedef HRESULT (__stdcall *DxcCreateInstanceProc)(
 typedef HRESULT (__stdcall *DxcCreateInstanceProc)(
     _In_ REFCLSID   rclsid,
     _In_ REFCLSID   rclsid,
     _In_ REFIID     riid,
     _In_ REFIID     riid,
@@ -83,18 +66,14 @@ typedef HRESULT(__stdcall *DxcCreateInstance2Proc)(
 /// While this function is similar to CoCreateInstance, there is no COM involvement.
 /// While this function is similar to CoCreateInstance, there is no COM involvement.
 /// </remarks>
 /// </remarks>
 
 
-#ifndef _MSC_VER
 extern "C"
 extern "C"
-#endif
 DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance(
 DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance(
   _In_ REFCLSID   rclsid,
   _In_ REFCLSID   rclsid,
   _In_ REFIID     riid,
   _In_ REFIID     riid,
   _Out_ LPVOID*   ppv
   _Out_ LPVOID*   ppv
   );
   );
 
 
-#ifndef _MSC_VER
 extern "C"
 extern "C"
-#endif
 DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
 DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
   _In_ IMalloc    *pMalloc,
   _In_ IMalloc    *pMalloc,
   _In_ REFCLSID   rclsid,
   _In_ REFCLSID   rclsid,
@@ -102,6 +81,56 @@ DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
   _Out_ LPVOID*   ppv
   _Out_ LPVOID*   ppv
 );
 );
 
 
+// For convenience, equivalent definitions to CP_UTF8 and CP_UTF16.
+#define DXC_CP_UTF8 65001
+#define DXC_CP_UTF16 1200
+// Use DXC_CP_ACP for: Binary;  ANSI Text;  Autodetect UTF with BOM
+#define DXC_CP_ACP 0
+
+// This flag indicates that the shader hash was computed taking into account source information (-Zss)
+#define DXC_HASHFLAG_INCLUDES_SOURCE  1
+
+// Hash digest type for ShaderHash
+typedef struct DxcShaderHash {
+  UINT32 Flags; // DXC_HASHFLAG_*
+  BYTE HashDigest[16];
+} DxcShaderHash;
+
+#define DXC_FOURCC(ch0, ch1, ch2, ch3) (                     \
+  (UINT32)(UINT8)(ch0)        | (UINT32)(UINT8)(ch1) << 8  | \
+  (UINT32)(UINT8)(ch2) << 16  | (UINT32)(UINT8)(ch3) << 24   \
+  )
+#define DXC_PART_PDB                      DXC_FOURCC('I', 'L', 'D', 'B')
+#define DXC_PART_PDB_NAME                 DXC_FOURCC('I', 'L', 'D', 'N')
+#define DXC_PART_PRIVATE_DATA             DXC_FOURCC('P', 'R', 'I', 'V')
+#define DXC_PART_ROOT_SIGNATURE           DXC_FOURCC('R', 'T', 'S', '0')
+#define DXC_PART_DXIL                     DXC_FOURCC('D', 'X', 'I', 'L')
+#define DXC_PART_REFLECTION_DATA          DXC_FOURCC('R', 'D', 'A', 'T')
+#define DXC_PART_SHADER_HASH              DXC_FOURCC('H', 'A', 'S', 'H')
+#define DXC_PART_INPUT_SIGNATURE          DXC_FOURCC('I', 'S', 'G', '1')
+#define DXC_PART_OUTPUT_SIGNATURE         DXC_FOURCC('O', 'S', 'G', '1')
+#define DXC_PART_PATCH_CONSTANT_SIGNATURE DXC_FOURCC('P', 'S', 'G', '1')
+
+// Some option arguments are defined here for continuity with D3DCompile interface
+#define DXC_ARG_DEBUG L"-Zi"
+#define DXC_ARG_SKIP_VALIDATION L"-Vd"
+#define DXC_ARG_SKIP_OPTIMIZATIONS L"-Od"
+#define DXC_ARG_PACK_MATRIX_ROW_MAJOR L"-Zpr"
+#define DXC_ARG_PACK_MATRIX_COLUMN_MAJOR L"-Zpc"
+#define DXC_ARG_AVOID_FLOW_CONTROL L"-Gfa"
+#define DXC_ARG_PREFER_FLOW_CONTROL L"-Gfp"
+#define DXC_ARG_ENABLE_STRICTNESS L"-Ges"
+#define DXC_ARG_ENABLE_BACKWARDS_COMPATIBILITY L"-Gec"
+#define DXC_ARG_IEEE_STRICTNESS L"-Gis"
+#define DXC_ARG_OPTIMIZATION_LEVEL0 L"-O0"
+#define DXC_ARG_OPTIMIZATION_LEVEL1 L"-O1"
+#define DXC_ARG_OPTIMIZATION_LEVEL2 L"-O2"
+#define DXC_ARG_OPTIMIZATION_LEVEL3 L"-O3"
+#define DXC_ARG_WARNINGS_ARE_ERRORS L"-WX"
+#define DXC_ARG_RESOURCES_MAY_ALIAS L"-res_may_alias"
+#define DXC_ARG_ALL_RESOURCES_BOUND L"-all_resources_bound"
+#define DXC_ARG_DEBUG_NAME_FOR_SOURCE L"-Zss"
+#define DXC_ARG_DEBUG_NAME_FOR_BINARY L"-Zsb"
 
 
 // IDxcBlob is an alias of ID3D10Blob and ID3DBlob
 // IDxcBlob is an alias of ID3D10Blob and ID3DBlob
 struct __declspec(uuid("8BA5FB08-5195-40e2-AC58-0D989C3A0102"))
 struct __declspec(uuid("8BA5FB08-5195-40e2-AC58-0D989C3A0102"))
@@ -122,70 +151,144 @@ public:
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobEncoding)
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobEncoding)
 };
 };
 
 
+// Notes on IDxcBlobUtf16 and IDxcBlobUtf8
+// These guarantee null-terminated text and the stated encoding.
+// GetBufferSize() will return the size in bytes, including null-terminator
+// GetStringLength() will return the length in characters, excluding the null-terminator
+// Name strings will use IDxcBlobUtf16, while other string output blobs,
+// such as errors/warnings, preprocessed HLSL, or other text will be based
+// on the -encoding option.
+
+// The API will use this interface for output name strings
+struct __declspec(uuid("A3F84EAB-0FAA-497E-A39C-EE6ED60B2D84"))
+IDxcBlobUtf16 : public IDxcBlobEncoding {
+public:
+  virtual LPCWSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
+  virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf16)
+};
+struct __declspec(uuid("3DA636C9-BA71-4024-A301-30CBF125305B"))
+IDxcBlobUtf8 : public IDxcBlobEncoding {
+public:
+  virtual LPCSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
+  virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf8)
+};
+
+struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c"))
+IDxcIncludeHandler : public IUnknown {
+  virtual HRESULT STDMETHODCALLTYPE LoadSource(
+    _In_z_ LPCWSTR pFilename,                                 // Candidate filename.
+    _COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource  // Resultant source object for included file, nullptr if not found.
+    ) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler)
+};
+
+// Structure for supplying bytes or text input to Dxc APIs.
+// Use Encoding = 0 for non-text bytes, ANSI text, or unknown with BOM.
+typedef struct DxcBuffer {
+  LPCVOID Ptr;
+  SIZE_T Size;
+  UINT Encoding;
+} DxcText;
+
+struct DxcDefine {
+  LPCWSTR Name;
+  _Maybenull_ LPCWSTR Value;
+};
+
+struct __declspec(uuid("73EFFE2A-70DC-45F8-9690-EFF64C02429D"))
+IDxcCompilerArgs : public IUnknown {
+  // Pass GetArguments() and GetCount() to Compile
+  virtual LPCWSTR* STDMETHODCALLTYPE GetArguments() = 0;
+  virtual UINT32 STDMETHODCALLTYPE GetCount() = 0;
+
+  // Add additional arguments or defines here, if desired.
+  virtual HRESULT STDMETHODCALLTYPE AddArguments(
+    _In_opt_count_(argCount) LPCWSTR *pArguments,       // Array of pointers to arguments to add
+    _In_ UINT32 argCount                                // Number of arguments to add
+  ) = 0;
+  virtual HRESULT STDMETHODCALLTYPE AddArgumentsUTF8(
+    _In_opt_count_(argCount)LPCSTR *pArguments,         // Array of pointers to UTF-8 arguments to add
+    _In_ UINT32 argCount                                // Number of arguments to add
+  ) = 0;
+  virtual HRESULT STDMETHODCALLTYPE AddDefines(
+      _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines
+      _In_ UINT32 defineCount                            // Number of defines
+  ) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompilerArgs)
+};
+
+//////////////////////////
+// Legacy Interfaces
+/////////////////////////
+
+// NOTE: IDxcUtils replaces IDxcLibrary
 struct __declspec(uuid("e5204dc7-d18c-4c3c-bdfb-851673980fe7"))
 struct __declspec(uuid("e5204dc7-d18c-4c3c-bdfb-851673980fe7"))
 IDxcLibrary : public IUnknown {
 IDxcLibrary : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0;
   virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
   virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
     _In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
     _In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobFromFile(
   virtual HRESULT STDMETHODCALLTYPE CreateBlobFromFile(
-    LPCWSTR pFileName, _In_opt_ UINT32* codePage,
+    _In_z_ LPCWSTR pFileName, _In_opt_ UINT32* codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned(
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned(
     _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
     _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy(
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy(
-       _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
-      _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+    _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc(
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc(
     _In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
     _In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateIncludeHandler(
   virtual HRESULT STDMETHODCALLTYPE CreateIncludeHandler(
-      _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
+    _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly(
   virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly(
-      _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
   virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
   virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
-      _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
   virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
-      _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
 
 
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLibrary)
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLibrary)
 };
 };
 
 
+// NOTE: IDxcResult replaces IDxcOperationResult
 struct __declspec(uuid("CEDB484A-D4E9-445A-B991-CA21CA157DC2"))
 struct __declspec(uuid("CEDB484A-D4E9-445A-B991-CA21CA157DC2"))
 IDxcOperationResult : public IUnknown {
 IDxcOperationResult : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0;
   virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0;
-  virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **pResult) = 0;
-  virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **pErrors) = 0;
 
 
-  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOperationResult)
-};
+  // GetResult returns the main result of the operation.
+  // This corresponds to:
+  // DXC_OUT_OBJECT - Compile() with shader or library target
+  // DXC_OUT_DISASSEMBLY - Disassemble()
+  // DXC_OUT_HLSL - Compile() with -P
+  // DXC_OUT_ROOT_SIGNATURE - Compile() with rootsig_* target
+  virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0;
 
 
-struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c"))
-IDxcIncludeHandler : public IUnknown {
-  virtual HRESULT STDMETHODCALLTYPE LoadSource(
-    _In_ LPCWSTR pFilename,                                   // Candidate filename.
-    _COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource  // Resultant source object for included file, nullptr if not found.
-    ) = 0;
+  // GetErrorBuffer Corresponds to DXC_OUT_ERRORS.
+  virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) = 0;
 
 
-  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler)
-};
-
-struct DxcDefine {
-  LPCWSTR Name;
-  _Maybenull_ LPCWSTR Value;
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOperationResult)
 };
 };
 
 
+// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
 struct __declspec(uuid("8c210bf3-011f-4422-8d70-6f9acb8db617"))
 struct __declspec(uuid("8c210bf3-011f-4422-8d70-6f9acb8db617"))
 IDxcCompiler : public IUnknown {
 IDxcCompiler : public IUnknown {
   // Compile a single entry point to the target shader model
   // Compile a single entry point to the target shader model
   virtual HRESULT STDMETHODCALLTYPE Compile(
   virtual HRESULT STDMETHODCALLTYPE Compile(
     _In_ IDxcBlob *pSource,                       // Source text to compile
     _In_ IDxcBlob *pSource,                       // Source text to compile
-    _In_opt_ LPCWSTR pSourceName,                 // Optional file name for pSource. Used in errors and include handlers.
-    _In_ LPCWSTR pEntryPoint,                     // entry point name
-    _In_ LPCWSTR pTargetProfile,                  // shader profile to compile
-    _In_count_(argCount) LPCWSTR *pArguments,     // Array of pointers to arguments
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_z_ LPCWSTR pEntryPoint,               // entry point name
+    _In_z_ LPCWSTR pTargetProfile,                // shader profile to compile
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
     _In_ UINT32 argCount,                         // Number of arguments
     _In_ UINT32 argCount,                         // Number of arguments
-    _In_count_(defineCount) const DxcDefine *pDefines,  // Array of defines
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult   // Compiler output status, buffer, and errors
     _COM_Outptr_ IDxcOperationResult **ppResult   // Compiler output status, buffer, and errors
@@ -194,10 +297,11 @@ IDxcCompiler : public IUnknown {
   // Preprocess source text
   // Preprocess source text
   virtual HRESULT STDMETHODCALLTYPE Preprocess(
   virtual HRESULT STDMETHODCALLTYPE Preprocess(
     _In_ IDxcBlob *pSource,                       // Source text to preprocess
     _In_ IDxcBlob *pSource,                       // Source text to preprocess
-    _In_opt_ LPCWSTR pSourceName,                 // Optional file name for pSource. Used in errors and include handlers.
-    _In_count_(argCount) LPCWSTR *pArguments,     // Array of pointers to arguments
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
     _In_ UINT32 argCount,                         // Number of arguments
     _In_ UINT32 argCount,                         // Number of arguments
-    _In_count_(defineCount) const DxcDefine *pDefines,  // Array of defines
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult   // Preprocessor output status, buffer, and errors
     _COM_Outptr_ IDxcOperationResult **ppResult   // Preprocessor output status, buffer, and errors
@@ -212,21 +316,23 @@ IDxcCompiler : public IUnknown {
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler)
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler)
 };
 };
 
 
+// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
 struct __declspec(uuid("A005A9D9-B8BB-4594-B5C9-0E633BEC4D37"))
 struct __declspec(uuid("A005A9D9-B8BB-4594-B5C9-0E633BEC4D37"))
 IDxcCompiler2 : public IDxcCompiler {
 IDxcCompiler2 : public IDxcCompiler {
   // Compile a single entry point to the target shader model with debug information.
   // Compile a single entry point to the target shader model with debug information.
   virtual HRESULT STDMETHODCALLTYPE CompileWithDebug(
   virtual HRESULT STDMETHODCALLTYPE CompileWithDebug(
     _In_ IDxcBlob *pSource,                       // Source text to compile
     _In_ IDxcBlob *pSource,                       // Source text to compile
-    _In_opt_ LPCWSTR pSourceName,                 // Optional file name for pSource. Used in errors and include handlers.
-    _In_ LPCWSTR pEntryPoint,                     // Entry point name
-    _In_ LPCWSTR pTargetProfile,                  // Shader profile to compile
-    _In_count_(argCount) LPCWSTR *pArguments,     // Array of pointers to arguments
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_z_ LPCWSTR pEntryPoint,               // Entry point name
+    _In_z_ LPCWSTR pTargetProfile,                // Shader profile to compile
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
     _In_ UINT32 argCount,                         // Number of arguments
     _In_ UINT32 argCount,                         // Number of arguments
-    _In_count_(defineCount) const DxcDefine *pDefines,  // Array of defines
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult,  // Compiler output status, buffer, and errors
     _COM_Outptr_ IDxcOperationResult **ppResult,  // Compiler output status, buffer, and errors
-    _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob.
+    _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. (Must be HeapFree()'d!)
     _COM_Outptr_opt_ IDxcBlob **ppDebugBlob       // Debug blob
     _COM_Outptr_opt_ IDxcBlob **ppDebugBlob       // Debug blob
   ) = 0;
   ) = 0;
 
 
@@ -238,28 +344,164 @@ IDxcLinker : public IUnknown {
 public:
 public:
   // Register a library with name to ref it later.
   // Register a library with name to ref it later.
   virtual HRESULT RegisterLibrary(
   virtual HRESULT RegisterLibrary(
-      _In_opt_ LPCWSTR pLibName,         // Name of the library.
-      _In_ IDxcBlob *pLib                // Library blob.
+    _In_opt_ LPCWSTR pLibName,          // Name of the library.
+    _In_ IDxcBlob *pLib                 // Library blob.
   ) = 0;
   ) = 0;
 
 
   // Links the shader and produces a shader blob that the Direct3D runtime can
   // Links the shader and produces a shader blob that the Direct3D runtime can
   // use.
   // use.
   virtual HRESULT STDMETHODCALLTYPE Link(
   virtual HRESULT STDMETHODCALLTYPE Link(
-      _In_opt_ LPCWSTR pEntryName, // Entry point name
-      _In_ LPCWSTR pTargetProfile, // shader profile to link
-      _In_count_(libCount)
-          const LPCWSTR *pLibNames, // Array of library names to link
-      UINT32 libCount,              // Number of libraries to link
-      _In_count_(argCount)
-          const LPCWSTR *pArguments, // Array of pointers to arguments
-      _In_ UINT32 argCount,          // Number of arguments
-      _COM_Outptr_ IDxcOperationResult *
-          *ppResult // Linker output status, buffer, and errors
+    _In_opt_ LPCWSTR pEntryName,        // Entry point name
+    _In_ LPCWSTR pTargetProfile,        // shader profile to link
+    _In_count_(libCount)
+        const LPCWSTR *pLibNames,       // Array of library names to link
+    _In_ UINT32 libCount,               // Number of libraries to link
+    _In_opt_count_(argCount) const LPCWSTR *pArguments, // Array of pointers to arguments
+    _In_ UINT32 argCount,               // Number of arguments
+    _COM_Outptr_
+        IDxcOperationResult **ppResult  // Linker output status, buffer, and errors
   ) = 0;
   ) = 0;
 
 
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLinker)
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLinker)
 };
 };
 
 
+/////////////////////////
+// Latest interfaces. Please use these
+////////////////////////
+
+// NOTE: IDxcUtils replaces IDxcLibrary
+struct __declspec(uuid("4605C4CB-2019-492A-ADA4-65F20BB7D67F"))
+IDxcUtils : public IUnknown {
+  // Create a sub-blob that holds a reference to the outer blob and points to its memory.
+  virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
+    _In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
+
+  // For codePage, use 0 (or DXC_CP_ACP) for raw binary or ANSI code page
+
+  // Creates a blob referencing existing memory, with no copy.
+  // User must manage the memory lifetime separately.
+  // (was: CreateBlobWithEncodingFromPinned)
+  virtual HRESULT STDMETHODCALLTYPE CreateBlobFromPinned(
+    _In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  // Create blob, taking ownership of memory allocated with supplied allocator.
+  // (was: CreateBlobWithEncodingOnMalloc)
+  virtual HRESULT STDMETHODCALLTYPE MoveToBlob(
+    _In_bytecount_(size) LPCVOID pData, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  ////
+  // New blobs and copied contents are allocated with the current allocator
+
+  // Copy blob contents to memory owned by the new blob.
+  // (was: CreateBlobWithEncodingOnHeapCopy)
+  virtual HRESULT STDMETHODCALLTYPE CreateBlob(
+    _In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  // (was: CreateBlobFromFile)
+  virtual HRESULT STDMETHODCALLTYPE LoadFile(
+    _In_z_ LPCWSTR pFileName, _In_opt_ UINT32* pCodePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE CreateReadOnlyStreamFromBlob(
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
+
+  // Create default file-based include handler
+  virtual HRESULT STDMETHODCALLTYPE CreateDefaultIncludeHandler(
+    _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
+
+  // Convert or return matching encoded text blobs
+  virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf8 **pBlobEncoding) = 0;
+  virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf16 **pBlobEncoding) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE GetDxilContainerPart(
+    _In_ const DxcBuffer *pShader,
+    _In_ UINT32 DxcPart,
+    _Outptr_result_nullonfailure_ void **ppPartData,
+    _Out_ UINT32 *pPartSizeInBytes) = 0;
+
+  // Create reflection interface from serialized Dxil container, or DXC_PART_REFLECTION_DATA.
+  // TBD: Require part header for RDAT?  (leaning towards yes)
+  virtual HRESULT STDMETHODCALLTYPE CreateReflection(
+    _In_ const DxcBuffer *pData, REFIID iid, void **ppvReflection) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE BuildArguments(
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_z_ LPCWSTR pEntryPoint,               // Entry point name. (-E)
+    _In_z_ LPCWSTR pTargetProfile,                // Shader profile to compile. (-T)
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
+    _In_ UINT32 argCount,                         // Number of arguments
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
+    _In_ UINT32 defineCount,                      // Number of defines
+    _COM_Outptr_ IDxcCompilerArgs **ppArgs        // Arguments you can use with Compile() method
+  ) = 0;
+
+  // Takes the shader PDB and returns the hash and the container inside it
+  virtual HRESULT STDMETHODCALLTYPE GetPDBContents(
+    _In_ IDxcBlob *pPDBBlob, _COM_Outptr_ IDxcBlob **ppHash, _COM_Outptr_ IDxcBlob **ppContainer) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcUtils)
+};
+
+// For use with IDxcResult::[Has|Get]Output dxcOutKind argument
+// Note: text outputs returned from version 2 APIs are UTF-8 or UTF-16 based on -encoding option
+typedef enum DXC_OUT_KIND {
+  DXC_OUT_NONE = 0,
+  DXC_OUT_OBJECT = 1,         // IDxcBlob - Shader or library object
+  DXC_OUT_ERRORS = 2,         // IDxcBlobUtf8 or IDxcBlobUtf16
+  DXC_OUT_PDB = 3,            // IDxcBlob
+  DXC_OUT_SHADER_HASH = 4,    // IDxcBlob - DxcShaderHash of shader or shader with source info (-Zsb/-Zss)
+  DXC_OUT_DISASSEMBLY = 5,    // IDxcBlobUtf8 or IDxcBlobUtf16 - from Disassemble
+  DXC_OUT_HLSL = 6,           // IDxcBlobUtf8 or IDxcBlobUtf16 - from Preprocessor or Rewriter
+  DXC_OUT_TEXT = 7,           // IDxcBlobUtf8 or IDxcBlobUtf16 - other text, such as -ast-dump or -Odump
+  DXC_OUT_REFLECTION = 8,     // IDxcBlob - RDAT part with reflection data
+  DXC_OUT_ROOT_SIGNATURE = 9, // IDxcBlob - Serialized root signature output
+
+  DXC_OUT_FORCE_DWORD = 0xFFFFFFFF
+} DXC_OUT_KIND;
+
+struct __declspec(uuid("58346CDA-DDE7-4497-9461-6F87AF5E0659"))
+IDxcResult : public IDxcOperationResult {
+  virtual BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) = 0;
+  virtual HRESULT STDMETHODCALLTYPE GetOutput(_In_ DXC_OUT_KIND dxcOutKind,
+    _In_ REFIID iid, _COM_Outptr_opt_result_maybenull_ void **ppvObject,
+    _COM_Outptr_ IDxcBlobUtf16 **ppOutputName) = 0;
+
+  virtual UINT32 GetNumOutputs() = 0;
+  virtual DXC_OUT_KIND GetOutputByIndex(UINT32 Index) = 0;
+  virtual DXC_OUT_KIND PrimaryOutput() = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcResult)
+};
+
+struct __declspec(uuid("228B4687-5A6A-4730-900C-9702B2203F54"))
+IDxcCompiler3 : public IUnknown {
+  // Compile a single entry point to the target shader model,
+  // Compile a library to a library target (-T lib_*),
+  // Compile a root signature (-T rootsig_*), or
+  // Preprocess HLSL source (-P)
+  virtual HRESULT STDMETHODCALLTYPE Compile(
+    _In_ const DxcBuffer *pSource,                // Source text to compile
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
+    _In_ UINT32 argCount,                         // Number of arguments
+    _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
+    _In_ REFIID riid, _Out_ LPVOID *ppResult      // IDxcResult: status, buffer, and errors
+  ) = 0;
+
+  // Disassemble a program.
+  virtual HRESULT STDMETHODCALLTYPE Disassemble(
+    _In_ const DxcBuffer *pObject,                // Program to disassemble: dxil container or bitcode.
+    _In_ REFIID riid, _Out_ LPVOID *ppResult      // IDxcResult: status, disassembly text, and errors
+    ) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler3)
+};
+
 static const UINT32 DxcValidatorFlags_Default = 0;
 static const UINT32 DxcValidatorFlags_Default = 0;
 static const UINT32 DxcValidatorFlags_InPlaceEdit = 1;  // Validator is allowed to update shader blob in-place.
 static const UINT32 DxcValidatorFlags_InPlaceEdit = 1;  // Validator is allowed to update shader blob in-place.
 static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2;
 static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2;
@@ -361,7 +603,6 @@ IDxcVersionInfo2 : public IDxcVersionInfo {
 #define CLSID_SCOPE
 #define CLSID_SCOPE
 #endif
 #endif
 
 
-// {73e22d93-e6ce-47f3-b5bf-f0664f39c1b0}
 CLSID_SCOPE const CLSID CLSID_DxcCompiler = {
 CLSID_SCOPE const CLSID CLSID_DxcCompiler = {
     0x73e22d93,
     0x73e22d93,
     0xe6ce,
     0xe6ce,
@@ -382,6 +623,13 @@ CLSID_SCOPE const CLSID CLSID_DxcDiaDataSource = {
     0x484d,
     0x484d,
     {0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f}};
     {0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f}};
 
 
+// {3E56AE82-224D-470F-A1A1-FE3016EE9F9D}
+CLSID_SCOPE const CLSID CLSID_DxcCompilerArgs = {
+    0x3e56ae82,
+    0x224d,
+    0x470f,
+    {0xa1, 0xa1, 0xfe, 0x30, 0x16, 0xee, 0x9f, 0x9d}};
+
 // {6245D6AF-66E0-48FD-80B4-4D271796748C}
 // {6245D6AF-66E0-48FD-80B4-4D271796748C}
 CLSID_SCOPE const GUID CLSID_DxcLibrary = {
 CLSID_SCOPE const GUID CLSID_DxcLibrary = {
     0x6245d6af,
     0x6245d6af,
@@ -389,6 +637,8 @@ CLSID_SCOPE const GUID CLSID_DxcLibrary = {
     0x48fd,
     0x48fd,
     {0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c}};
     {0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c}};
 
 
+CLSID_SCOPE const GUID CLSID_DxcUtils = CLSID_DxcLibrary;
+
 // {8CA3E215-F728-4CF3-8CDD-88AF917587A1}
 // {8CA3E215-F728-4CF3-8CDD-88AF917587A1}
 CLSID_SCOPE const GUID CLSID_DxcValidator = {
 CLSID_SCOPE const GUID CLSID_DxcValidator = {
     0x8ca3e215,
     0x8ca3e215,

+ 9 - 1
include/dxc/dxcdxrfallbackcompiler.h

@@ -96,8 +96,16 @@ struct __declspec(uuid("76bb3c85-006d-4b72-9e10-63cd97df57f0"))
   ) = 0;
   ) = 0;
 };
 };
 
 
+// Note: __declspec(selectany) requires 'extern'
+// On Linux __declspec(selectany) is removed and using 'extern' results in link error.
+#ifdef _MSC_VER
+#define CLSID_SCOPE __declspec(selectany) extern
+#else
+#define CLSID_SCOPE
+#endif
+
 // {76bb3c85-006d-4b72-9e10-63cd97df57f0}
 // {76bb3c85-006d-4b72-9e10-63cd97df57f0}
-__declspec(selectany) extern const GUID CLSID_DxcDxrFallbackCompiler = {
+CLSID_SCOPE const GUID CLSID_DxcDxrFallbackCompiler = {
   0x76bb3c85,
   0x76bb3c85,
   0x006d,
   0x006d,
   0x4b72,
   0x4b72,

+ 5 - 0
include/llvm/Analysis/InstructionSimplify.h

@@ -322,6 +322,11 @@ namespace llvm {
                       AssumptionCache *AC = nullptr,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr);
                       const Instruction *CxtI = nullptr);
 
 
+// HLSL Change - Begin
+  Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+                          Type *Ty, const DataLayout &DL);
+// HLSL Change - End
+
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// instruction.  If not, this returns null.
   /// instruction.  If not, this returns null.
   Value *SimplifyInstruction(Instruction *I, const DataLayout &DL,
   Value *SimplifyInstruction(Instruction *I, const DataLayout &DL,

+ 51 - 0
include/llvm/IR/IRBuilder.h

@@ -517,6 +517,7 @@ template<bool preserveNames = true, typename T = ConstantFolder,
 class IRBuilder : public IRBuilderBase, public Inserter {
 class IRBuilder : public IRBuilderBase, public Inserter {
   T Folder;
   T Folder;
 public:
 public:
+  bool AllowFolding = true; // HLSL Change - Runtime flag on whether to do folding
   IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
   IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
             MDNode *FPMathTag = nullptr)
             MDNode *FPMathTag = nullptr)
     : IRBuilderBase(C, FPMathTag), Inserter(I), Folder(F) {
     : IRBuilderBase(C, FPMathTag), Inserter(I), Folder(F) {
@@ -703,6 +704,7 @@ private:
 public:
 public:
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
         return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
@@ -717,6 +719,7 @@ public:
   }
   }
   Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFAdd(LC, RC), Name);
         return Insert(Folder.CreateFAdd(LC, RC), Name);
@@ -725,6 +728,7 @@ public:
   }
   }
   Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSub(LC, RC, HasNUW, HasNSW), Name);
         return Insert(Folder.CreateSub(LC, RC, HasNUW, HasNSW), Name);
@@ -739,6 +743,7 @@ public:
   }
   }
   Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFSub(LC, RC), Name);
         return Insert(Folder.CreateFSub(LC, RC), Name);
@@ -747,6 +752,7 @@ public:
   }
   }
   Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateMul(LC, RC, HasNUW, HasNSW), Name);
         return Insert(Folder.CreateMul(LC, RC, HasNUW, HasNSW), Name);
@@ -761,6 +767,7 @@ public:
   }
   }
   Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFMul(LC, RC), Name);
         return Insert(Folder.CreateFMul(LC, RC), Name);
@@ -769,6 +776,7 @@ public:
   }
   }
   Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
         return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
@@ -781,6 +789,7 @@ public:
   }
   }
   Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
         return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
@@ -793,6 +802,7 @@ public:
   }
   }
   Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFDiv(LC, RC), Name);
         return Insert(Folder.CreateFDiv(LC, RC), Name);
@@ -800,12 +810,14 @@ public:
                                       FPMathTag, FMF), Name);
                                       FPMathTag, FMF), Name);
   }
   }
   Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
   Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateURem(LC, RC), Name);
         return Insert(Folder.CreateURem(LC, RC), Name);
     return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
     return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
   }
   }
   Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
   Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSRem(LC, RC), Name);
         return Insert(Folder.CreateSRem(LC, RC), Name);
@@ -813,6 +825,7 @@ public:
   }
   }
   Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFRem(LC, RC), Name);
         return Insert(Folder.CreateFRem(LC, RC), Name);
@@ -822,6 +835,7 @@ public:
 
 
   Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
         return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
@@ -841,6 +855,7 @@ public:
 
 
   Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
         return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
@@ -859,6 +874,7 @@ public:
 
 
   Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
   Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
         return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
@@ -876,6 +892,7 @@ public:
   }
   }
 
 
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
       if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
       if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
         return LHS;  // LHS & -1 -> LHS
         return LHS;  // LHS & -1 -> LHS
@@ -892,6 +909,7 @@ public:
   }
   }
 
 
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
       if (RC->isNullValue())
       if (RC->isNullValue())
         return LHS;  // LHS | 0 -> LHS
         return LHS;  // LHS | 0 -> LHS
@@ -908,6 +926,7 @@ public:
   }
   }
 
 
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateXor(LC, RC), Name);
         return Insert(Folder.CreateXor(LC, RC), Name);
@@ -923,6 +942,7 @@ public:
   Value *CreateBinOp(Instruction::BinaryOps Opc,
   Value *CreateBinOp(Instruction::BinaryOps Opc,
                      Value *LHS, Value *RHS, const Twine &Name = "",
                      Value *LHS, Value *RHS, const Twine &Name = "",
                      MDNode *FPMathTag = nullptr) {
                      MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
         return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
@@ -934,6 +954,7 @@ public:
 
 
   Value *CreateNeg(Value *V, const Twine &Name = "",
   Value *CreateNeg(Value *V, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
       return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
     BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
     BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
@@ -949,12 +970,14 @@ public:
   }
   }
   Value *CreateFNeg(Value *V, const Twine &Name = "",
   Value *CreateFNeg(Value *V, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFNeg(VC), Name);
       return Insert(Folder.CreateFNeg(VC), Name);
     return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V),
     return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V),
                                       FPMathTag, FMF), Name);
                                       FPMathTag, FMF), Name);
   }
   }
   Value *CreateNot(Value *V, const Twine &Name = "") {
   Value *CreateNot(Value *V, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNot(VC), Name);
       return Insert(Folder.CreateNot(VC), Name);
     return Insert(BinaryOperator::CreateNot(V), Name);
     return Insert(BinaryOperator::CreateNot(V), Name);
@@ -1035,6 +1058,7 @@ public:
   }
   }
   Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
   Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
                    const Twine &Name = "") {
                    const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       // Every index must be constant.
       size_t i, e;
       size_t i, e;
@@ -1052,6 +1076,7 @@ public:
   }
   }
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
                            const Twine &Name = "") {
                            const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       // Every index must be constant.
       size_t i, e;
       size_t i, e;
@@ -1068,6 +1093,7 @@ public:
     return CreateGEP(nullptr, Ptr, Idx, Name);
     return CreateGEP(nullptr, Ptr, Idx, Name);
   }
   }
   Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") {
   Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateGetElementPtr(Ty, PC, IC), Name);
         return Insert(Folder.CreateGetElementPtr(Ty, PC, IC), Name);
@@ -1075,6 +1101,7 @@ public:
   }
   }
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx,
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx,
                            const Twine &Name = "") {
                            const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, IC), Name);
         return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, IC), Name);
@@ -1087,6 +1114,7 @@ public:
                             const Twine &Name = "") {
                             const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
 
 
@@ -1096,6 +1124,7 @@ public:
                                     const Twine &Name = "") {
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
 
 
@@ -1108,6 +1137,7 @@ public:
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
     };
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
 
 
@@ -1120,6 +1150,7 @@ public:
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
     };
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
 
 
@@ -1128,6 +1159,7 @@ public:
   Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
   Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
 
 
@@ -1137,6 +1169,7 @@ public:
                                     const Twine &Name = "") {
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
 
 
@@ -1149,6 +1182,7 @@ public:
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
     };
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
 
 
@@ -1161,6 +1195,7 @@ public:
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
     };
 
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
                     Name);
                     Name);
@@ -1262,6 +1297,7 @@ public:
                              const Twine &Name = "") {
                              const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
       return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
     return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
@@ -1270,6 +1306,7 @@ public:
                              const Twine &Name = "") {
                              const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
       return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
     return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
@@ -1278,6 +1315,7 @@ public:
                               const Twine &Name = "") {
                               const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
       return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
     return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
@@ -1286,6 +1324,7 @@ public:
                     const Twine &Name = "") {
                     const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
       return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
     return Insert(CastInst::Create(Op, V, DestTy), Name);
     return Insert(CastInst::Create(Op, V, DestTy), Name);
@@ -1294,6 +1333,7 @@ public:
                            const Twine &Name = "") {
                            const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
       return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
     return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
     return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
@@ -1304,6 +1344,7 @@ public:
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
 
 
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V)) {
     if (Constant *VC = dyn_cast<Constant>(V)) {
       return Insert(Folder.CreatePointerBitCastOrAddrSpaceCast(VC, DestTy),
       return Insert(Folder.CreatePointerBitCastOrAddrSpaceCast(VC, DestTy),
                     Name);
                     Name);
@@ -1317,6 +1358,7 @@ public:
                        const Twine &Name = "") {
                        const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
       return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
@@ -1342,6 +1384,7 @@ public:
   Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
   Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
     if (V->getType() == DestTy)
       return V;
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFPCast(VC, DestTy), Name);
       return Insert(Folder.CreateFPCast(VC, DestTy), Name);
     return Insert(CastInst::CreateFPCast(V, DestTy), Name);
     return Insert(CastInst::CreateFPCast(V, DestTy), Name);
@@ -1441,6 +1484,7 @@ public:
 
 
   Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
   Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "") {
                     const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateICmp(P, LC, RC), Name);
         return Insert(Folder.CreateICmp(P, LC, RC), Name);
@@ -1448,6 +1492,7 @@ public:
   }
   }
   Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
   Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "", MDNode *FPMathTag = nullptr) {
                     const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFCmp(P, LC, RC), Name);
         return Insert(Folder.CreateFCmp(P, LC, RC), Name);
@@ -1481,6 +1526,7 @@ public:
 
 
   Value *CreateSelect(Value *C, Value *True, Value *False,
   Value *CreateSelect(Value *C, Value *True, Value *False,
                       const Twine &Name = "") {
                       const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *CC = dyn_cast<Constant>(C))
     if (Constant *CC = dyn_cast<Constant>(C))
       if (Constant *TC = dyn_cast<Constant>(True))
       if (Constant *TC = dyn_cast<Constant>(True))
         if (Constant *FC = dyn_cast<Constant>(False))
         if (Constant *FC = dyn_cast<Constant>(False))
@@ -1494,6 +1540,7 @@ public:
 
 
   Value *CreateExtractElement(Value *Vec, Value *Idx,
   Value *CreateExtractElement(Value *Vec, Value *Idx,
                               const Twine &Name = "") {
                               const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(Vec))
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *IC = dyn_cast<Constant>(Idx))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateExtractElement(VC, IC), Name);
         return Insert(Folder.CreateExtractElement(VC, IC), Name);
@@ -1507,6 +1554,7 @@ public:
 
 
   Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
   Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
                              const Twine &Name = "") {
                              const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(Vec))
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *NC = dyn_cast<Constant>(NewElt))
       if (Constant *NC = dyn_cast<Constant>(NewElt))
         if (Constant *IC = dyn_cast<Constant>(Idx))
         if (Constant *IC = dyn_cast<Constant>(Idx))
@@ -1521,6 +1569,7 @@ public:
 
 
   Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
   Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
                              const Twine &Name = "") {
                              const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *V1C = dyn_cast<Constant>(V1))
     if (Constant *V1C = dyn_cast<Constant>(V1))
       if (Constant *V2C = dyn_cast<Constant>(V2))
       if (Constant *V2C = dyn_cast<Constant>(V2))
         if (Constant *MC = dyn_cast<Constant>(Mask))
         if (Constant *MC = dyn_cast<Constant>(Mask))
@@ -1541,6 +1590,7 @@ public:
   Value *CreateExtractValue(Value *Agg,
   Value *CreateExtractValue(Value *Agg,
                             ArrayRef<unsigned> Idxs,
                             ArrayRef<unsigned> Idxs,
                             const Twine &Name = "") {
                             const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *AggC = dyn_cast<Constant>(Agg))
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
       return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
     return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
     return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
@@ -1549,6 +1599,7 @@ public:
   Value *CreateInsertValue(Value *Agg, Value *Val,
   Value *CreateInsertValue(Value *Agg, Value *Val,
                            ArrayRef<unsigned> Idxs,
                            ArrayRef<unsigned> Idxs,
                            const Twine &Name = "") {
                            const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *AggC = dyn_cast<Constant>(Agg))
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
       if (Constant *ValC = dyn_cast<Constant>(Val))
         return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);
         return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);

+ 3 - 0
include/llvm/InitializePasses.h

@@ -261,6 +261,9 @@ void initializeResourceToHandlePass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
 void initializeHoistConstantArrayPass(PassRegistry&);
 void initializeHoistConstantArrayPass(PassRegistry&);
 void initializeDxilLoopUnrollPass(PassRegistry&);
 void initializeDxilLoopUnrollPass(PassRegistry&);
+void initializeDxilInsertNoopsPass(PassRegistry&);
+void initializeDxilFinalizeNoopsPass(PassRegistry&);
+void initializeDxilEliminateVectorPass(PassRegistry&);
 void initializeDxilConditionalMem2RegPass(PassRegistry&);
 void initializeDxilConditionalMem2RegPass(PassRegistry&);
 void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 // HLSL Change Ends
 // HLSL Change Ends

+ 10 - 0
include/llvm/Transforms/Scalar.h

@@ -143,6 +143,15 @@ void initializeDxilLoopUnrollPass(PassRegistry&);
 Pass *createDxilEraseDeadRegionPass();
 Pass *createDxilEraseDeadRegionPass();
 void initializeDxilEraseDeadRegionPass(PassRegistry&);
 void initializeDxilEraseDeadRegionPass(PassRegistry&);
 
 
+Pass *createDxilEliminateVectorPass();
+void initializeDxilEliminateVectorPass(PassRegistry&);
+
+Pass *createDxilInsertNoopsPass();
+void initializeDxilInsertNoopsPass(PassRegistry&);
+
+Pass *createDxilFinalizeNoopsPass();
+void initializeDxilFinalizeNoopsPass(PassRegistry&);
+
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 //
 //
 // LowerStaticGlobalIntoAlloca. Replace static globals with alloca if only used
 // LowerStaticGlobalIntoAlloca. Replace static globals with alloca if only used
@@ -494,6 +503,7 @@ FunctionPass *createSampleProfileLoaderPass(StringRef Name);
 // ScalarizerPass - Converts vector operations into scalar operations
 // ScalarizerPass - Converts vector operations into scalar operations
 //
 //
 FunctionPass *createScalarizerPass();
 FunctionPass *createScalarizerPass();
+FunctionPass *createScalarizerPass(bool NoOpt);
 
 
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 //
 //

+ 335 - 6
lib/Analysis/InstructionSimplify.cpp

@@ -3941,6 +3941,333 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
                         Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
                         Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 }
 
 
+// HLSL Change - Begin
+// Copied CastInst simplification from LLVM 8
+
+static
+Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
+                                        Constant *C, Type *SrcEltTy,
+                                        unsigned NumSrcElts,
+                                        const DataLayout &DL) {
+  // Now that we know that the input value is a vector of integers, just shift
+  // and insert them into our result.
+  unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
+  for (unsigned i = 0; i != NumSrcElts; ++i) {
+    Constant *Element;
+    if (DL.isLittleEndian())
+      Element = C->getAggregateElement(NumSrcElts - i - 1);
+    else
+      Element = C->getAggregateElement(i);
+
+    if (Element && isa<UndefValue>(Element)) {
+      Result <<= BitShift;
+      continue;
+    }
+
+    auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
+    if (!ElementCI)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    Result <<= BitShift;
+    Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+  }
+
+  return nullptr;
+}
+
+/// Constant fold bitcast, symbolically evaluating it with DataLayout.
+/// This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static
+Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
+  // Catch the obvious splat cases.
+  if (C->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+  if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+      !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+    return Constant::getAllOnesValue(DestTy);
+
+  if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
+    // Handle a vector->scalar integer/fp cast.
+    if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
+      unsigned NumSrcElts = VTy->getNumElements();
+      Type *SrcEltTy = VTy->getElementType();
+
+      // If the vector is a vector of floating point, convert it to vector of int
+      // to simplify things.
+      if (SrcEltTy->isFloatingPointTy()) {
+        unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+        Type *SrcIVTy =
+          VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+        // Ask IR to do the conversion now that #elts line up.
+        C = ConstantExpr::getBitCast(C, SrcIVTy);
+      }
+
+      APInt Result(DL.getTypeSizeInBits(DestTy), 0);
+      if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
+                                                SrcEltTy, NumSrcElts, DL))
+        return CE;
+
+      if (isa<IntegerType>(DestTy))
+        return ConstantInt::get(DestTy, Result);
+
+      APFloat FP(DestTy->getFltSemantics(), Result);
+      return ConstantFP::get(DestTy->getContext(), FP);
+    }
+  }
+
+  // The code below only handles casts to vectors currently.
+  auto *DestVTy = dyn_cast<VectorType>(DestTy);
+  if (!DestVTy)
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+  // vector so the code below can handle it uniformly.
+  if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+    Constant *Ops = C; // don't take the address of C!
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
+  }
+
+  // If this is a bitcast from constant vector -> vector, fold it.
+  if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  // If the element types match, IR can fold it.
+  unsigned NumDstElt = DestVTy->getNumElements();
+  unsigned NumSrcElt = C->getType()->getVectorNumElements();
+  if (NumDstElt == NumSrcElt)
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  Type *SrcEltTy = C->getType()->getVectorElementType();
+  Type *DstEltTy = DestVTy->getElementType();
+
+  // Otherwise, we're changing the number of elements in a vector, which
+  // requires endianness information to do the right thing.  For example,
+  //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  // folds to (little endian):
+  //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  // and to (big endian):
+  //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+  // First thing is first.  We only want to think about integer here, so if
+  // we have something in FP form, recast it as integer.
+  if (DstEltTy->isFloatingPointTy()) {
+    // Fold to an vector of integers with same size as our FP type.
+    unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+    Type *DestIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+    // Recursively handle this integer conversion, if possible.
+    C = FoldBitCast(C, DestIVTy, DL);
+
+    // Finally, IR can handle this now that #elts line up.
+    return ConstantExpr::getBitCast(C, DestTy);
+  }
+
+  // Okay, we know the destination is integer, if the input is FP, convert
+  // it to integer first.
+  if (SrcEltTy->isFloatingPointTy()) {
+    unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+    Type *SrcIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+    // Ask IR to do the conversion now that #elts line up.
+    C = ConstantExpr::getBitCast(C, SrcIVTy);
+    // If IR wasn't able to fold it, bail out.
+    if (!isa<ConstantVector>(C) &&  // FIXME: Remove ConstantVector.
+        !isa<ConstantDataVector>(C))
+      return C;
+  }
+
+  // Now we know that the input and output vectors are both integer vectors
+  // of the same size, and that their #elements is not the same.  Do the
+  // conversion here, which depends on whether the input or output has
+  // more elements.
+  bool isLittleEndian = DL.isLittleEndian();
+
+  SmallVector<Constant*, 32> Result;
+  if (NumDstElt < NumSrcElt) {
+    // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+    Constant *Zero = Constant::getNullValue(DstEltTy);
+    unsigned Ratio = NumSrcElt/NumDstElt;
+    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned SrcElt = 0;
+    for (unsigned i = 0; i != NumDstElt; ++i) {
+      // Build each element of the result.
+      Constant *Elt = Zero;
+      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        Constant *Src = C->getAggregateElement(SrcElt++);
+        if (Src && isa<UndefValue>(Src))
+          Src = Constant::getNullValue(C->getType()->getVectorElementType());
+        else
+          Src = dyn_cast_or_null<ConstantInt>(Src);
+        if (!Src)  // Reject constantexpr elements.
+          return ConstantExpr::getBitCast(C, DestTy);
+
+        // Zero extend the element to the right size.
+        Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+        // Shift it to the right place, depending on endianness.
+        Src = ConstantExpr::getShl(Src,
+                                   ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+        // Mix it in.
+        Elt = ConstantExpr::getOr(Elt, Src);
+      }
+      Result.push_back(Elt);
+    }
+    return ConstantVector::get(Result);
+  }
+
+  // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  unsigned Ratio = NumDstElt/NumSrcElt;
+  unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
+
+  // Loop over each source value, expanding into multiple results.
+  for (unsigned i = 0; i != NumSrcElt; ++i) {
+    auto *Element = C->getAggregateElement(i);
+
+    if (!Element) // Reject constantexpr elements.
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    if (isa<UndefValue>(Element)) {
+      // Correctly Propagate undef values.
+      Result.append(Ratio, UndefValue::get(DstEltTy));
+      continue;
+    }
+
+    auto *Src = dyn_cast<ConstantInt>(Element);
+    if (!Src)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+    for (unsigned j = 0; j != Ratio; ++j) {
+      // Shift the piece of the value into the right place, depending on
+      // endianness.
+      Constant *Elt = ConstantExpr::getLShr(Src,
+                                  ConstantInt::get(Src->getType(), ShiftAmt));
+      ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+      // Truncate the element to an integer with the same pointer size and
+      // convert the element back to a pointer using a inttoptr.
+      if (DstEltTy->isPointerTy()) {
+        IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
+        Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
+        Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
+        continue;
+      }
+
+      // Truncate and remember this piece.
+      Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+    }
+  }
+
+  return ConstantVector::get(Result);
+}
+
+static
+Constant *ConstantFoldCastOperand(unsigned Opcode, Constant *C,
+                                        Type *DestTy, const DataLayout &DL) {
+  assert(Instruction::isCast(Opcode));
+  switch (Opcode) {
+  default:
+    llvm_unreachable("Missing case");
+  case Instruction::PtrToInt:
+    // If the input is a inttoptr, eliminate the pair.  This requires knowing
+    // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+      if (CE->getOpcode() == Instruction::IntToPtr) {
+        Constant *Input = CE->getOperand(0);
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
+        unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
+        if (PtrWidth < InWidth) {
+          Constant *Mask =
+            ConstantInt::get(CE->getContext(),
+                             APInt::getLowBitsSet(InWidth, PtrWidth));
+          Input = ConstantExpr::getAnd(Input, Mask);
+        }
+        // Do a zext or trunc to get to the dest size.
+        return ConstantExpr::getIntegerCast(Input, DestTy, false);
+      }
+    }
+    return ConstantExpr::getCast(Opcode, C, DestTy);
+  case Instruction::IntToPtr:
+    // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+    // the int size is >= the ptr size and the address spaces are the same.
+    // This requires knowing the width of a pointer, so it can't be done in
+    // ConstantExpr::getCast.
+    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+      if (CE->getOpcode() == Instruction::PtrToInt) {
+        Constant *SrcPtr = CE->getOperand(0);
+        unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
+        unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+        if (MidIntSize >= SrcPtrSize) {
+          unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+          if (SrcAS == DestTy->getPointerAddressSpace())
+            return FoldBitCast(CE->getOperand(0), DestTy, DL);
+        }
+      }
+    }
+
+    return ConstantExpr::getCast(Opcode, C, DestTy);
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::AddrSpaceCast:
+      return ConstantExpr::getCast(Opcode, C, DestTy);
+  case Instruction::BitCast:
+    return FoldBitCast(C, DestTy, DL);
+  }
+}
+
+static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+                               Type *Ty, const DataLayout &DL) {
+  if (auto *C = dyn_cast<Constant>(Op))
+    return ConstantFoldCastOperand(CastOpc, C, Ty, DL);
+
+  if (auto *CI = dyn_cast<CastInst>(Op)) {
+    auto *Src = CI->getOperand(0);
+    Type *SrcTy = Src->getType();
+    Type *MidTy = CI->getType();
+    Type *DstTy = Ty;
+    if (Src->getType() == Ty) {
+      auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+      auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
+      Type *SrcIntPtrTy =
+          SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+      Type *MidIntPtrTy =
+          MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+      Type *DstIntPtrTy =
+          DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
+      if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy,
+                                         SrcIntPtrTy, MidIntPtrTy,
+                                         DstIntPtrTy) == Instruction::BitCast)
+        return Src;
+    }
+  }
+
+  // bitcast x -> x
+  if (CastOpc == Instruction::BitCast)
+    if (Op->getType() == Ty)
+      return Op;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op,
+                              Type *Ty, const DataLayout &DL) {
+  return ::SimplifyCastInst(CastOpc, Op, Ty, DL);
+}
+
+// HLSL Change - End
+
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
 /// instruction.  If not, this returns null.
 Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
 Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
@@ -4075,12 +4402,14 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
     break;
     break;
   case Instruction::Call: {
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
     CallSite CS(cast<CallInst>(I));
-    // HLSL Change Begin - simplify dxil call.
-    if (hlsl::CanSimplify(CS.getCalledFunction())) {
-      SmallVector<Value *, 4> Args(CS.arg_begin(), CS.arg_end());
-      if (Value *DxilResult = hlsl::SimplifyDxilCall(CS.getCalledFunction(), Args, I)) {
-        Result = DxilResult;
-        break;
+    // HLSL Change Begin - simplify dxil calls.
+    if (Function *Callee = CS.getCalledFunction()) {
+      if (hlsl::CanSimplify(Callee)) {
+        SmallVector<Value *, 4> Args(CS.arg_begin(), CS.arg_end());
+        if (Value *DxilResult = hlsl::SimplifyDxilCall(CS.getCalledFunction(), Args, I)) {
+          Result = DxilResult;
+          break;
+        }
       }
       }
     }
     }
     // HLSL Change End.
     // HLSL Change End.

+ 36 - 0
lib/DXIL/DxilMetadataHelper.cpp

@@ -22,6 +22,7 @@
 
 
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Module.h"
@@ -48,6 +49,7 @@ const char DxilMDHelper::kDxilTypeSystemMDName[]                      = "dx.type
 const char DxilMDHelper::kDxilTypeSystemHelperVariablePrefix[]        = "dx.typevar.";
 const char DxilMDHelper::kDxilTypeSystemHelperVariablePrefix[]        = "dx.typevar.";
 const char DxilMDHelper::kDxilControlFlowHintMDName[]                 = "dx.controlflow.hints";
 const char DxilMDHelper::kDxilControlFlowHintMDName[]                 = "dx.controlflow.hints";
 const char DxilMDHelper::kDxilPreciseAttributeMDName[]                = "dx.precise";
 const char DxilMDHelper::kDxilPreciseAttributeMDName[]                = "dx.precise";
+const char DxilMDHelper::kDxilVariableDebugLayoutMDName[]             = "dx.dbg.varlayout";
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
@@ -2401,4 +2403,38 @@ void DxilMDHelper::MarkNonUniform(Instruction *I) {
   I->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, preciseNode);
   I->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, preciseNode);
 }
 }
 
 
+bool DxilMDHelper::GetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned &StartOffsetInBits, std::vector<DxilDIArrayDim> &ArrayDims) {
+  llvm::MDTuple *Tuple = dyn_cast_or_null<MDTuple>(inst->getMetadata(DxilMDHelper::kDxilVariableDebugLayoutMDName));
+  if (Tuple == nullptr) return false;
+
+  IFTBOOL(Tuple->getNumOperands() % 2 == 1, DXC_E_INCORRECT_DXIL_METADATA);
+
+  StartOffsetInBits = ConstMDToUint32(Tuple->getOperand(0));
+
+  for (unsigned Idx = 1; Idx < Tuple->getNumOperands(); Idx += 2) {
+    DxilDIArrayDim ArrayDim = {};
+    ArrayDim.StrideInBits = ConstMDToUint32(Tuple->getOperand(Idx + 0));
+    ArrayDim.NumElements = ConstMDToUint32(Tuple->getOperand(Idx + 1));
+    ArrayDims.emplace_back(ArrayDim);
+  }
+
+  return true;
+}
+
+void DxilMDHelper::SetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned StartOffsetInBits, const std::vector<DxilDIArrayDim> &ArrayDims) {
+  LLVMContext &Ctx = inst->getContext();
+
+  std::vector<Metadata*> MDVals;
+  MDVals.reserve(ArrayDims.size() + 1);
+  MDVals.emplace_back(Uint32ToConstMD(StartOffsetInBits, Ctx));
+  for (const DxilDIArrayDim &ArrayDim : ArrayDims) {
+    MDVals.emplace_back(Uint32ToConstMD(ArrayDim.StrideInBits, Ctx));
+    MDVals.emplace_back(Uint32ToConstMD(ArrayDim.NumElements, Ctx));
+  }
+
+  inst->setMetadata(DxilMDHelper::kDxilVariableDebugLayoutMDName, MDNode::get(Ctx, MDVals));
+}
+
 } // namespace hlsl
 } // namespace hlsl

+ 6 - 2
lib/DXIL/DxilModule.cpp

@@ -1009,6 +1009,7 @@ namespace {
 template <typename TResource>
 template <typename TResource>
 static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<TResource>> &vec) {
 static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<TResource>> &vec) {
   unsigned resID = 0;
   unsigned resID = 0;
+  std::unordered_set<GlobalVariable *> eraseList; // Need in case of duplicate defs of lib resources
   for (auto p = vec.begin(); p != vec.end();) {
   for (auto p = vec.begin(); p != vec.end();) {
     auto c = p++;
     auto c = p++;
     Constant *symbol = (*c)->GetGlobalSymbol();
     Constant *symbol = (*c)->GetGlobalSymbol();
@@ -1016,7 +1017,7 @@ static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<T
     if (symbol->user_empty()) {
     if (symbol->user_empty()) {
       p = vec.erase(c);
       p = vec.erase(c);
       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(symbol))
       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(symbol))
-        GV->eraseFromParent();
+        eraseList.insert(GV);
       continue;
       continue;
     }
     }
     if ((*c)->GetID() != resID) {
     if ((*c)->GetID() != resID) {
@@ -1024,6 +1025,9 @@ static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<T
     }
     }
     resID++;
     resID++;
   }
   }
+  for (auto gv : eraseList) {
+    gv->eraseFromParent();
+  }
 }
 }
 }
 }
 
 
@@ -1557,7 +1561,7 @@ static bool ResourceTypeRequiresTranslation(const StructType* Ty) {
         return true;
         return true;
     }
     }
     SequentialType *seqTy;
     SequentialType *seqTy;
-    while (seqTy = dyn_cast<SequentialType>(eTy)) {
+    while ((seqTy = dyn_cast<SequentialType>(eTy))) {
       eTy = seqTy->getElementType();
       eTy = seqTy->getElementType();
     }
     }
     if (eTy->getScalarSizeInBits() < 32) {
     if (eTy->getScalarSizeInBits() < 32) {

+ 2 - 1
lib/DXIL/DxilOperations.cpp

@@ -506,7 +506,8 @@ bool OP::IsDxilOpFuncName(StringRef name) {
 }
 }
 
 
 bool OP::IsDxilOpFunc(const llvm::Function *F) {
 bool OP::IsDxilOpFunc(const llvm::Function *F) {
-  if (!F->hasName())
+  // Test for null to allow IsDxilOpFunc(Call.getCalledFunc()) to be resilient to indirect calls
+  if (F == nullptr || !F->hasName())
     return false;
     return false;
   return IsDxilOpFuncName(F->getName());
   return IsDxilOpFuncName(F->getName());
 }
 }

+ 58 - 33
lib/DXIL/DxilPDB.cpp

@@ -42,6 +42,7 @@ static const char kMsfMagic[] = {'M',  'i',  'c',    'r', 'o', 's',  'o',  'f',
                                  'M',  'S',  'F',    ' ', '7', '.',  '0',  '0',
                                  'M',  'S',  'F',    ' ', '7', '.',  '0',  '0',
                                  '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'};
                                  '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'};
 
 
+static const uint32_t kPdbStreamIndex = 1; // This is the fixed stream index where the PDB stream header is
 static const uint32_t kDataStreamIndex = 5; // This is the fixed stream index where we will store our custom data.
 static const uint32_t kDataStreamIndex = 5; // This is the fixed stream index where we will store our custom data.
 static const uint32_t kMsfBlockSize = 512;
 static const uint32_t kMsfBlockSize = 512;
 
 
@@ -154,7 +155,7 @@ struct MSFWriter {
     void WriteBlocks(uint32_t NumBlocks, const void *Data, uint32_t Size) {
     void WriteBlocks(uint32_t NumBlocks, const void *Data, uint32_t Size) {
       assert(NumBlocks >= GetNumBlocks(Size) && "Cannot fit data into the requested number of blocks!");
       assert(NumBlocks >= GetNumBlocks(Size) && "Cannot fit data into the requested number of blocks!");
       uint32_t TotalSize = NumBlocks * kMsfBlockSize;
       uint32_t TotalSize = NumBlocks * kMsfBlockSize;
-      OS.write((char *)Data, Size);
+      OS.write(static_cast<char*>(const_cast<void *>(Data)), Size);
       WriteZeroPads(TotalSize - Size);
       WriteZeroPads(TotalSize - Size);
       BlocksWritten += NumBlocks;
       BlocksWritten += NumBlocks;
     }
     }
@@ -375,29 +376,7 @@ struct PDBReader {
     return m_pStream->Seek(Offset, STREAM_SEEK_CUR, &BytesMoved);
     return m_pStream->Seek(Offset, STREAM_SEEK_CUR, &BytesMoved);
   }
   }
 
 
-  HRESULT ReadU32ListFromBlocks(ArrayRef<uint32_t> Blocks, UINT32 uOffsetByU32, UINT32 uNumU32, SmallVectorImpl<uint32_t> &Output) {
-    if (Blocks.size() == 0) return E_FAIL;
-    Output.clear();
-
-    for (unsigned i = 0; i < uNumU32; i++) {
-      UINT32 uOffsetInBytes = (uOffsetByU32+i) * sizeof(UINT32);
-      UINT32 BlockIndex = uOffsetInBytes / m_SB.BlockSize;
-      UINT32 ByteOffset = uOffsetInBytes % m_SB.BlockSize;
-
-      UINT32 uBlock = Blocks[BlockIndex];
-      IFR(GoToBeginningOfBlock(uBlock));
-      IFR(OffsetByU32(ByteOffset / sizeof(UINT32)));
-
-      UINT32 uData = 0;
-      IFR(ReadU32(&uData));
-
-      Output.push_back(uData);
-    }
-
-    return S_OK;
-  }
-
-  HRESULT ReadContainedData(IDxcBlob **ppData) {
+  HRESULT ReadWholeStream(uint32_t StreamIndex, IDxcBlob **ppData) {
     if (FAILED(m_Status)) return m_Status;
     if (FAILED(m_Status)) return m_Status;
 
 
     UINT32 uNumDirectoryBlocks =
     UINT32 uNumDirectoryBlocks =
@@ -418,21 +397,21 @@ struct PDBReader {
     IFR(ReadU32(&uNumStreams));
     IFR(ReadU32(&uNumStreams));
 
 
     // If we don't have enough streams, then give up.
     // If we don't have enough streams, then give up.
-    if (uNumStreams <= kDataStreamIndex)
+    if (uNumStreams <= StreamIndex)
       return E_FAIL;
       return E_FAIL;
 
 
     llvm::SmallVector<uint32_t, 6> StreamSizes;
     llvm::SmallVector<uint32_t, 6> StreamSizes;
     IFR(ReadU32ListFromBlocks(DirectoryBlocks, 1, uNumStreams, StreamSizes));
     IFR(ReadU32ListFromBlocks(DirectoryBlocks, 1, uNumStreams, StreamSizes));
 
 
     UINT32 uOffsets = 0;
     UINT32 uOffsets = 0;
-    for (unsigned i = 0; i <= kDataStreamIndex-1; i++) {
+    for (unsigned i = 0; i < StreamIndex; i++) {
       UINT32 uNumBlocks = CalculateNumBlocks(m_SB.BlockSize, StreamSizes[i]);
       UINT32 uNumBlocks = CalculateNumBlocks(m_SB.BlockSize, StreamSizes[i]);
       uOffsets += uNumBlocks;
       uOffsets += uNumBlocks;
     }
     }
 
 
     llvm::SmallVector<uint32_t, 12> DataBlocks;
     llvm::SmallVector<uint32_t, 12> DataBlocks;
     IFR(ReadU32ListFromBlocks(DirectoryBlocks, 1 + uNumStreams + uOffsets, 
     IFR(ReadU32ListFromBlocks(DirectoryBlocks, 1 + uNumStreams + uOffsets, 
-      CalculateNumBlocks(m_SB.BlockSize, StreamSizes[kDataStreamIndex]), DataBlocks));
+      CalculateNumBlocks(m_SB.BlockSize, StreamSizes[StreamIndex]), DataBlocks));
 
 
     if (DataBlocks.size() == 0)
     if (DataBlocks.size() == 0)
       return E_FAIL;
       return E_FAIL;
@@ -457,20 +436,66 @@ struct PDBReader {
 
 
     return S_OK;
     return S_OK;
   }
   }
-};
 
 
+  HRESULT ReadU32ListFromBlocks(ArrayRef<uint32_t> Blocks, UINT32 uOffsetByU32, UINT32 uNumU32, SmallVectorImpl<uint32_t> &Output) {
+    if (Blocks.size() == 0) return E_FAIL;
+    Output.clear();
 
 
-HRESULT hlsl::pdb::LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppContainer) {
+    for (unsigned i = 0; i < uNumU32; i++) {
+      UINT32 uOffsetInBytes = (uOffsetByU32+i) * sizeof(UINT32);
+      UINT32 BlockIndex = uOffsetInBytes / m_SB.BlockSize;
+      UINT32 ByteOffset = uOffsetInBytes % m_SB.BlockSize;
+
+      UINT32 uBlock = Blocks[BlockIndex];
+      IFR(GoToBeginningOfBlock(uBlock));
+      IFR(OffsetByU32(ByteOffset / sizeof(UINT32)));
+
+      UINT32 uData = 0;
+      IFR(ReadU32(&uData));
+
+      Output.push_back(uData);
+    }
+
+    return S_OK;
+  }
+};
+
+HRESULT hlsl::pdb::LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppHash, IDxcBlob **ppContainer) {
   PDBReader Reader(pMalloc, pIStream);
   PDBReader Reader(pMalloc, pIStream);
 
 
-  CComPtr<IDxcBlob> pDataBlob;
-  IFR(Reader.ReadContainedData(&pDataBlob));
+  if (ppHash) {
+    CComPtr<IDxcBlob> pPdbStream;
+    IFR(Reader.ReadWholeStream(kPdbStreamIndex, &pPdbStream));
+
+    if (pPdbStream->GetBufferSize() < sizeof(PdbStreamHeader))
+      return E_FAIL;
+
+    PdbStreamHeader PdbHeader = {};
+    memcpy(&PdbHeader, pPdbStream->GetBufferPointer(), sizeof(PdbHeader));
+
+    CComPtr<hlsl::AbstractMemoryStream> pHash;
+    IFR(CreateMemoryStream(pMalloc, &pHash));
+    ULONG uBytesWritten = 0;
+    IFR(pHash->Write(PdbHeader.UniqueId, sizeof(PdbHeader.UniqueId), &uBytesWritten));
+
+    if (uBytesWritten != sizeof(PdbHeader.UniqueId))
+      return E_FAIL;
+
+    IFR(pHash.QueryInterface(ppHash));
+  }
+
+  CComPtr<IDxcBlob> pContainer;
+  IFR(Reader.ReadWholeStream(kDataStreamIndex, &pContainer));
 
 
-  if (!hlsl::IsValidDxilContainer((hlsl::DxilContainerHeader *)pDataBlob->GetBufferPointer(), pDataBlob->GetBufferSize()))
+  if (!hlsl::IsValidDxilContainer((hlsl::DxilContainerHeader *)pContainer->GetBufferPointer(), pContainer->GetBufferSize()))
     return E_FAIL;
     return E_FAIL;
 
 
-  *ppContainer = pDataBlob.Detach();
+  *ppContainer = pContainer.Detach();
 
 
   return S_OK;
   return S_OK;
 }
 }
 
 
+HRESULT hlsl::pdb::LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppContainer) {
+  return LoadDataFromStream(pMalloc, pIStream, nullptr, ppContainer);
+}
+

+ 1 - 1
lib/DXIL/DxilShaderFlags.cpp

@@ -47,8 +47,8 @@ ShaderFlags::ShaderFlags():
 , m_bBarycentrics(false)
 , m_bBarycentrics(false)
 , m_bUseNativeLowPrecision(false)
 , m_bUseNativeLowPrecision(false)
 , m_bShadingRate(false)
 , m_bShadingRate(false)
-, m_bSamplerFeedback(false)
 , m_bRaytracingTier1_1(false)
 , m_bRaytracingTier1_1(false)
+, m_bSamplerFeedback(false)
 , m_align0(0)
 , m_align0(0)
 , m_align1(0)
 , m_align1(0)
 {}
 {}

+ 5 - 3
lib/DXIL/DxilShaderModel.cpp

@@ -250,7 +250,10 @@ const char * ShaderModel::GetKindName() const {
   return GetKindName(m_Kind);
   return GetKindName(m_Kind);
 }
 }
 
 
-const char * ShaderModel::GetKindName(Kind kind) {
+const char *ShaderModel::GetKindName(Kind kind) {
+  static_assert(static_cast<unsigned>(Kind::Invalid) ==
+                    _countof(ShaderModelKindNames) - 1,
+                "Invalid kinds or names");
   return ShaderModelKindNames[static_cast<unsigned int>(kind)];
   return ShaderModelKindNames[static_cast<unsigned int>(kind)];
 }
 }
 
 
@@ -332,12 +335,11 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
 
 
   // lib_6_x is for offline linking only, and relaxes restrictions
   // lib_6_x is for offline linking only, and relaxes restrictions
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),
-
+  
   SM(Kind::Mesh,     6, 5, "ms_6_5",    0,  0,  true,  true,  UINT_MAX),
   SM(Kind::Mesh,     6, 5, "ms_6_5",    0,  0,  true,  true,  UINT_MAX),
   SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true,  true,  UINT_MAX),
   SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true,  true,  UINT_MAX),
 
 
   // Values before Invalid must remain sorted by Kind, then Major, then Minor.
   // Values before Invalid must remain sorted by Kind, then Major, then Minor.
-
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
 };
 };
 
 

+ 5 - 1
lib/DXIL/DxilUtil.cpp

@@ -538,6 +538,10 @@ bool IsHLSLResourceType(llvm::Type *Ty) {
 
 
 bool IsHLSLObjectType(llvm::Type *Ty) {
 bool IsHLSLObjectType(llvm::Type *Ty) {
   if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
   if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
+    if (!ST->hasName()) {
+      return false;
+    }
+
     StringRef name = ST->getName();
     StringRef name = ST->getName();
     // TODO: don't check names.
     // TODO: don't check names.
     if (name.startswith("dx.types.wave_t"))
     if (name.startswith("dx.types.wave_t"))
@@ -585,7 +589,7 @@ bool ContainsHLSLObjectType(llvm::Type *Ty) {
     Ty = llvm::cast<llvm::ArrayType>(Ty)->getArrayElementType();
     Ty = llvm::cast<llvm::ArrayType>(Ty)->getArrayElementType();
 
 
   if (llvm::StructType *ST = llvm::dyn_cast<llvm::StructType>(Ty)) {
   if (llvm::StructType *ST = llvm::dyn_cast<llvm::StructType>(Ty)) {
-    if (ST->getName().startswith("dx.types."))
+    if (ST->hasName() && ST->getName().startswith("dx.types."))
       return true;
       return true;
     // TODO: How is this suppoed to check for Input/OutputPatch types if
     // TODO: How is this suppoed to check for Input/OutputPatch types if
     // these have already been eliminated in function arguments during CG?
     // these have already been eliminated in function arguments during CG?

File diff suppressed because it is too large
+ 523 - 271
lib/DxcSupport/FileIOHelper.cpp


+ 65 - 28
lib/DxcSupport/HLSLOptions.cpp

@@ -296,10 +296,27 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   const MainArgs &argStrings, DxcOpts &opts,
   const MainArgs &argStrings, DxcOpts &opts,
   llvm::raw_ostream &errors) {
   llvm::raw_ostream &errors) {
   DXASSERT_NOMSG(optionTable != nullptr);
   DXASSERT_NOMSG(optionTable != nullptr);
+  opts.DefaultTextCodePage = DXC_CP_UTF8;
 
 
   unsigned missingArgIndex = 0, missingArgCount = 0;
   unsigned missingArgIndex = 0, missingArgCount = 0;
   InputArgList Args = optionTable->ParseArgs(
   InputArgList Args = optionTable->ParseArgs(
     argStrings.getArrayRef(), missingArgIndex, missingArgCount, flagsToInclude);
     argStrings.getArrayRef(), missingArgIndex, missingArgCount, flagsToInclude);
+
+  // Set DefaultTextCodePage early so it may influence error buffer
+  // Default to UTF8 for compatibility
+  llvm::StringRef encoding = Args.getLastArgValue(OPT_encoding);
+  if (!encoding.empty()) {
+    if (encoding.equals_lower("utf8")) {
+      opts.DefaultTextCodePage = DXC_CP_UTF8;
+    } else if (encoding.equals_lower("utf16")) {
+      opts.DefaultTextCodePage = DXC_CP_UTF16;
+    } else {
+      errors << "Unsupported value '" << encoding
+        << "for -encoding option.  Allowed values: utf8, utf16.";
+      return 1;
+    }
+  }
+
   // Verify consistency for external library support.
   // Verify consistency for external library support.
   opts.ExternalLib = Args.getLastArgValue(OPT_external_lib);
   opts.ExternalLib = Args.getLastArgValue(OPT_external_lib);
   opts.ExternalFn = Args.getLastArgValue(OPT_external_fn);
   opts.ExternalFn = Args.getLastArgValue(OPT_external_fn);
@@ -359,13 +376,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   }
   }
 
 
   if (opts.IsLibraryProfile()) {
   if (opts.IsLibraryProfile()) {
-    if (Args.getLastArg(OPT_entrypoint)) {
-      errors << "cannot specify entry point for a library";
-      return 1;
-    } else {
-      // Set entry point to impossible name.
-      opts.EntryPoint = "lib.no::entry";
-    }
+    // Don't bother erroring out when entry is specified.  We weren't always
+    // doing this before, so doing so will break existing code.
+    // Set entry point to impossible name.
+    opts.EntryPoint = "lib.no::entry";
   } else {
   } else {
     if (Args.getLastArg(OPT_exports)) {
     if (Args.getLastArg(OPT_exports)) {
       errors << "library profile required when using -exports option";
       errors << "library profile required when using -exports option";
@@ -427,6 +441,9 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.OutputObject = Args.getLastArgValue(OPT_Fo);
   opts.OutputObject = Args.getLastArgValue(OPT_Fo);
   opts.OutputHeader = Args.getLastArgValue(OPT_Fh);
   opts.OutputHeader = Args.getLastArgValue(OPT_Fh);
   opts.OutputWarningsFile = Args.getLastArgValue(OPT_Fe);
   opts.OutputWarningsFile = Args.getLastArgValue(OPT_Fe);
+  opts.OutputReflectionFile = Args.getLastArgValue(OPT_Fre);
+  opts.OutputRootSigFile = Args.getLastArgValue(OPT_Frs);
+  opts.OutputShaderHashFile = Args.getLastArgValue(OPT_Fsh);
   opts.UseColor = Args.hasFlag(OPT_Cc, OPT_INVALID, false);
   opts.UseColor = Args.hasFlag(OPT_Cc, OPT_INVALID, false);
   opts.UseInstructionNumbers = Args.hasFlag(OPT_Ni, OPT_INVALID, false);
   opts.UseInstructionNumbers = Args.hasFlag(OPT_Ni, OPT_INVALID, false);
   opts.UseInstructionByteOffsets = Args.hasFlag(OPT_No, OPT_INVALID, false);
   opts.UseInstructionByteOffsets = Args.hasFlag(OPT_No, OPT_INVALID, false);
@@ -440,6 +457,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
+  if (opts.ForceRootSigVer.empty())
+    opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver_);
   opts.PrivateSource = Args.getLastArgValue(OPT_setprivate);
   opts.PrivateSource = Args.getLastArgValue(OPT_setprivate);
   opts.RootSignatureSource = Args.getLastArgValue(OPT_setrootsignature);
   opts.RootSignatureSource = Args.getLastArgValue(OPT_setrootsignature);
   opts.VerifyRootSignatureSource = Args.getLastArgValue(OPT_verifyrootsignature);
   opts.VerifyRootSignatureSource = Args.getLastArgValue(OPT_verifyrootsignature);
@@ -536,13 +555,17 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DisableValidation = Args.hasFlag(OPT_VD, OPT_INVALID, false);
   opts.DisableValidation = Args.hasFlag(OPT_VD, OPT_INVALID, false);
 
 
   opts.AllResourcesBound = Args.hasFlag(OPT_all_resources_bound, OPT_INVALID, false);
   opts.AllResourcesBound = Args.hasFlag(OPT_all_resources_bound, OPT_INVALID, false);
+  opts.AllResourcesBound = Args.hasFlag(OPT_all_resources_bound_, OPT_INVALID, opts.AllResourcesBound);
   opts.ColorCodeAssembly = Args.hasFlag(OPT_Cc, OPT_INVALID, false);
   opts.ColorCodeAssembly = Args.hasFlag(OPT_Cc, OPT_INVALID, false);
   opts.DefaultRowMajor = Args.hasFlag(OPT_Zpr, OPT_INVALID, false);
   opts.DefaultRowMajor = Args.hasFlag(OPT_Zpr, OPT_INVALID, false);
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
-  opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load, OPT_INVALID, false);
+  opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_no_legacy_cbuf_layout, OPT_INVALID, false);
+  opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load_, OPT_INVALID, opts.NotUseLegacyCBufLoad);
   opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
   opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
+  opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable_, OPT_INVALID, opts.PackPrefixStable);
   opts.PackOptimized = Args.hasFlag(OPT_pack_optimized, OPT_INVALID, false);
   opts.PackOptimized = Args.hasFlag(OPT_pack_optimized, OPT_INVALID, false);
+  opts.PackOptimized = Args.hasFlag(OPT_pack_optimized_, OPT_INVALID, opts.PackOptimized);
   opts.DisplayIncludeProcess = Args.hasFlag(OPT_H, OPT_INVALID, false);
   opts.DisplayIncludeProcess = Args.hasFlag(OPT_H, OPT_INVALID, false);
   opts.WarningAsError = Args.hasFlag(OPT__SLASH_WX, OPT_INVALID, false);
   opts.WarningAsError = Args.hasFlag(OPT__SLASH_WX, OPT_INVALID, false);
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
@@ -564,6 +587,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.LegacyResourceReservation = Args.hasFlag(OPT_flegacy_resource_reservation, OPT_INVALID, false);
   opts.LegacyResourceReservation = Args.hasFlag(OPT_flegacy_resource_reservation, OPT_INVALID, false);
   opts.ExportShadersOnly = Args.hasFlag(OPT_export_shaders_only, OPT_INVALID, false);
   opts.ExportShadersOnly = Args.hasFlag(OPT_export_shaders_only, OPT_INVALID, false);
   opts.ResMayAlias = Args.hasFlag(OPT_res_may_alias, OPT_INVALID, false);
   opts.ResMayAlias = Args.hasFlag(OPT_res_may_alias, OPT_INVALID, false);
+  opts.ResMayAlias = Args.hasFlag(OPT_res_may_alias_, OPT_INVALID, opts.ResMayAlias);
 
 
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
@@ -599,9 +623,18 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
 
   if (!opts.Preprocess.empty() &&
   if (!opts.Preprocess.empty() &&
       (!opts.OutputHeader.empty() || !opts.OutputObject.empty() ||
       (!opts.OutputHeader.empty() || !opts.OutputObject.empty() ||
-       !opts.OutputWarnings || !opts.OutputWarningsFile.empty())) {
-    errors << "Preprocess cannot be specified with other options.";
-    return 1;
+       !opts.OutputWarnings || !opts.OutputWarningsFile.empty() ||
+       !opts.OutputReflectionFile.empty() ||
+       !opts.OutputRootSigFile.empty() ||
+       !opts.OutputShaderHashFile.empty())) {
+    opts.OutputHeader = "";
+    opts.OutputObject = "";
+    opts.OutputWarnings = true;
+    opts.OutputWarningsFile = "";
+    opts.OutputReflectionFile = "";
+    opts.OutputRootSigFile = "";
+    opts.OutputShaderHashFile = "";
+    errors << "Warning: compiler options ignored with Preprocess.";
   }
   }
 
 
   if (opts.DumpBin) {
   if (opts.DumpBin) {
@@ -620,6 +653,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     }
     }
   }
   }
 
 
+  // XXX TODO: Sort this out, since it's required for new API, but a separate argument for old APIs.
   if ((flagsToInclude & hlsl::options::DriverOption) &&
   if ((flagsToInclude & hlsl::options::DriverOption) &&
       opts.TargetProfile.empty() && !opts.DumpBin && opts.Preprocess.empty() && !opts.RecompileFromBinary) {
       opts.TargetProfile.empty() && !opts.DumpBin && opts.Preprocess.empty() && !opts.RecompileFromBinary) {
     // Target profile is required in arguments only for drivers when compiling;
     // Target profile is required in arguments only for drivers when compiling;
@@ -628,23 +662,6 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     return 1;
     return 1;
   }
   }
 
 
-  if (opts.EmbedDebug && !opts.DebugInfo) {
-    errors << "Must enable debug info with /Zi for /Qembed_debug";
-    return 1;
-  }
-
-  if (opts.DebugInfo && !opts.DebugNameForBinary && !opts.DebugNameForSource) {
-    opts.DebugNameForBinary = true;
-  } else if (opts.DebugNameForBinary && opts.DebugNameForSource) {
-    errors << "Cannot specify both /Zss and /Zsb";
-    return 1;
-  }
-
-  if (opts.DebugNameForSource && !opts.DebugInfo) {
-    errors << "/Zss requires debug info (/Zi)";
-    return 1;
-  }
-
   llvm::StringRef valVersionStr = Args.getLastArgValue(OPT_validator_version);
   llvm::StringRef valVersionStr = Args.getLastArgValue(OPT_validator_version);
   if (!valVersionStr.empty()) {
   if (!valVersionStr.empty()) {
     // Parse "major.minor" version string
     // Parse "major.minor" version string
@@ -815,6 +832,26 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 #endif // ENABLE_SPIRV_CODEGEN
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV Change Ends
   // SPIRV Change Ends
 
 
+  // Validation for DebugInfo here because spirv uses same DebugInfo opt,
+  // and legacy wrappers will add EmbedDebug in this case, leading to this
+  // failing if placed before spirv path sets DebugInfo to true.
+  if (opts.EmbedDebug && !opts.DebugInfo) {
+    errors << "Must enable debug info with /Zi for /Qembed_debug";
+    return 1;
+  }
+
+  if (opts.DebugInfo && !opts.DebugNameForBinary && !opts.DebugNameForSource) {
+    opts.DebugNameForBinary = true;
+  } else if (opts.DebugNameForBinary && opts.DebugNameForSource) {
+    errors << "Cannot specify both /Zss and /Zsb";
+    return 1;
+  }
+
+  if (opts.DebugNameForSource && !opts.DebugInfo) {
+    errors << "/Zss requires debug info (/Zi)";
+    return 1;
+  }
+
   opts.Args = std::move(Args);
   opts.Args = std::move(Args);
   return 0;
   return 0;
 }
 }

+ 26 - 10
lib/DxcSupport/Unicode.cpp

@@ -53,7 +53,7 @@ int MultiByteToWideChar(uint32_t CodePage, uint32_t /*dwFlags*/,
   size_t rv;
   size_t rv;
   const char *locale = CPToLocale(CodePage);
   const char *locale = CPToLocale(CodePage);
   locale = setlocale(LC_ALL, locale);
   locale = setlocale(LC_ALL, locale);
-  if (lpMultiByteStr[cbMultiByte] != '\0') {
+  if (lpMultiByteStr[cbMultiByte - 1] != '\0') {
     char *srcStr = (char *)malloc((cbMultiByte +1) * sizeof(char));
     char *srcStr = (char *)malloc((cbMultiByte +1) * sizeof(char));
     strncpy(srcStr, lpMultiByteStr, cbMultiByte);
     strncpy(srcStr, lpMultiByteStr, cbMultiByte);
     srcStr[cbMultiByte]='\0';
     srcStr[cbMultiByte]='\0';
@@ -102,7 +102,7 @@ int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
   size_t rv;
   size_t rv;
   const char *locale = CPToLocale(CodePage);
   const char *locale = CPToLocale(CodePage);
   locale = setlocale(LC_ALL, locale);
   locale = setlocale(LC_ALL, locale);
-  if (lpWideCharStr[cchWideChar] != L'\0') {
+  if (lpWideCharStr[cchWideChar - 1] != L'\0') {
     wchar_t *srcStr = (wchar_t *)malloc((cchWideChar+1) * sizeof(wchar_t));
     wchar_t *srcStr = (wchar_t *)malloc((cchWideChar+1) * sizeof(wchar_t));
     wcsncpy(srcStr, lpWideCharStr, cchWideChar);
     wcsncpy(srcStr, lpWideCharStr, cchWideChar);
     srcStr[cchWideChar] = L'\0';
     srcStr[cchWideChar] = L'\0';
@@ -120,10 +120,9 @@ int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
 namespace Unicode {
 namespace Unicode {
 
 
 _Success_(return != false)
 _Success_(return != false)
-bool UTF16ToEncodedString(_In_z_ const wchar_t* text, DWORD cp, DWORD flags, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
+bool UTF16ToEncodedString(_In_z_ const wchar_t* text, size_t cUTF16, DWORD cp, DWORD flags, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
   BOOL usedDefaultChar;
   BOOL usedDefaultChar;
   LPBOOL pUsedDefaultChar = (lossy == nullptr) ? nullptr : &usedDefaultChar;
   LPBOOL pUsedDefaultChar = (lossy == nullptr) ? nullptr : &usedDefaultChar;
-  size_t cUTF16 = wcslen(text);
   if (lossy != nullptr) *lossy = false;
   if (lossy != nullptr) *lossy = false;
 
 
   // Handle zero-length as a special case; it's a special value to indicate errors in WideCharToMultiByte.
   // Handle zero-length as a special case; it's a special value to indicate errors in WideCharToMultiByte.
@@ -188,30 +187,47 @@ std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8) {
 }
 }
 
 
 _Use_decl_annotations_
 _Use_decl_annotations_
-bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
+bool UTF8ToConsoleString(_In_z_ const char* text, _In_ size_t textLen, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
   DXASSERT_NOMSG(text != nullptr);
   DXASSERT_NOMSG(text != nullptr);
   DXASSERT_NOMSG(pValue != nullptr);
   DXASSERT_NOMSG(pValue != nullptr);
   std::wstring text16;
   std::wstring text16;
   if (lossy != nullptr) *lossy = false;
   if (lossy != nullptr) *lossy = false;
-  if (!UTF8ToUTF16String(text, &text16)) {
+  if (!UTF8ToUTF16String(text, textLen, &text16)) {
     return false;
     return false;
   }
   }
-  return UTF16ToConsoleString(text16.c_str(), pValue, lossy);
+  return UTF16ToConsoleString(text16.c_str(), text16.length(), pValue, lossy);
 }
 }
 
 
 _Use_decl_annotations_
 _Use_decl_annotations_
-bool UTF16ToConsoleString(const wchar_t* text, std::string* pValue, bool* lossy) {
+bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
+  return UTF8ToConsoleString(text, strlen(text), pValue, lossy);
+}
+
+_Use_decl_annotations_
+bool UTF16ToConsoleString(const wchar_t* text, _In_ size_t textLen, std::string* pValue, bool* lossy) {
   DXASSERT_NOMSG(text != nullptr);
   DXASSERT_NOMSG(text != nullptr);
   DXASSERT_NOMSG(pValue != nullptr);
   DXASSERT_NOMSG(pValue != nullptr);
   UINT cp = GetConsoleOutputCP();
   UINT cp = GetConsoleOutputCP();
-  return UTF16ToEncodedString(text, cp, 0, pValue, lossy);
+  return UTF16ToEncodedString(text, textLen, cp, 0, pValue, lossy);
+}
+
+_Use_decl_annotations_
+bool UTF16ToConsoleString(const wchar_t* text, std::string* pValue, bool* lossy) {
+  return UTF16ToConsoleString(text, wcslen(text), pValue, lossy);
+}
+
+_Use_decl_annotations_
+bool UTF16ToUTF8String(const wchar_t *pUTF16, size_t cUTF16, std::string *pUTF8) {
+  DXASSERT_NOMSG(pUTF16 != nullptr);
+  DXASSERT_NOMSG(pUTF8 != nullptr);
+  return UTF16ToEncodedString(pUTF16, cUTF16, CP_UTF8, 0, pUTF8, nullptr);
 }
 }
 
 
 _Use_decl_annotations_
 _Use_decl_annotations_
 bool UTF16ToUTF8String(const wchar_t *pUTF16, std::string *pUTF8) {
 bool UTF16ToUTF8String(const wchar_t *pUTF16, std::string *pUTF8) {
   DXASSERT_NOMSG(pUTF16 != nullptr);
   DXASSERT_NOMSG(pUTF16 != nullptr);
   DXASSERT_NOMSG(pUTF8 != nullptr);
   DXASSERT_NOMSG(pUTF8 != nullptr);
-  return UTF16ToEncodedString(pUTF16, CP_UTF8, 0, pUTF8, nullptr);
+  return UTF16ToEncodedString(pUTF16, wcslen(pUTF16), CP_UTF8, 0, pUTF8, nullptr);
 }
 }
 
 
 std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16) {
 std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16) {

+ 102 - 20
lib/DxcSupport/dxcapi.use.cpp

@@ -13,6 +13,7 @@
 #include "dxc/Support/dxcapi.use.h"
 #include "dxc/Support/dxcapi.use.h"
 #include "dxc/Support/Global.h"
 #include "dxc/Support/Global.h"
 #include "dxc/Support/Unicode.h"
 #include "dxc/Support/Unicode.h"
+#include "dxc/Support/FileIOHelper.h"
 #include "dxc/Support/WinFunctions.h"
 #include "dxc/Support/WinFunctions.h"
 
 
 namespace dxc {
 namespace dxc {
@@ -99,36 +100,128 @@ void WriteOperationResultToConsole(_In_ IDxcOperationResult *pRewriteResult,
   WriteBlobToConsole(pBlob, STD_OUTPUT_HANDLE);
   WriteBlobToConsole(pBlob, STD_OUTPUT_HANDLE);
 }
 }
 
 
+static void WriteUtf16NullTermToConsole(_In_opt_count_(charCount) const wchar_t *pText,
+                                 DWORD streamType) {
+  if (pText == nullptr) {
+    return;
+  }
+
+  bool lossy; // Note: even if there was loss,  print anyway
+  std::string consoleMessage;
+  Unicode::UTF16ToConsoleString(pText, &consoleMessage, &lossy);
+  if (streamType == STD_OUTPUT_HANDLE) {
+    fprintf(stdout, "%s\n", consoleMessage.c_str());
+  }
+  else if (streamType == STD_ERROR_HANDLE) {
+    fprintf(stderr, "%s\n", consoleMessage.c_str());
+  }
+  else {
+    throw hlsl::Exception(E_INVALIDARG);
+  }
+}
+
+static HRESULT BlobToUtf8IfText(_In_opt_ IDxcBlob *pBlob, IDxcBlobUtf8 **ppBlobUtf8) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  if (SUCCEEDED(pBlob->QueryInterface(&pBlobEncoding))) {
+    BOOL known;
+    UINT32 cp = 0;
+    IFT(pBlobEncoding->GetEncoding(&known, &cp));
+    if (known) {
+      return hlsl::DxcGetBlobAsUtf8(pBlob, nullptr, ppBlobUtf8);
+    }
+  }
+  return S_OK;
+}
+
+static HRESULT BlobToUtf16IfText(_In_opt_ IDxcBlob *pBlob, IDxcBlobUtf16 **ppBlobUtf16) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  if (SUCCEEDED(pBlob->QueryInterface(&pBlobEncoding))) {
+    BOOL known;
+    UINT32 cp = 0;
+    IFT(pBlobEncoding->GetEncoding(&known, &cp));
+    if (known) {
+      return hlsl::DxcGetBlobAsUtf16(pBlob, nullptr, ppBlobUtf16);
+    }
+  }
+  return S_OK;
+}
+
 void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType) {
 void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType) {
   if (pBlob == nullptr) {
   if (pBlob == nullptr) {
     return;
     return;
   }
   }
 
 
-  // Assume UTF-8 for now, which is typically the case for dxcompiler ouput.
-  WriteUtf8ToConsoleSizeT((char *)pBlob->GetBufferPointer(), pBlob->GetBufferSize(), streamType);
+  // Try to get as UTF-16 or UTF-8
+  BOOL known;
+  UINT32 cp = 0;
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  IFT(pBlob->QueryInterface(&pBlobEncoding));
+  IFT(pBlobEncoding->GetEncoding(&known, &cp));
+
+  if (cp == DXC_CP_UTF16) {
+    CComPtr<IDxcBlobUtf16> pUtf16;
+    IFT(hlsl::DxcGetBlobAsUtf16(pBlob, nullptr, &pUtf16));
+    WriteUtf16NullTermToConsole(pUtf16->GetStringPointer(), streamType);
+  } else if (cp == CP_UTF8) {
+    CComPtr<IDxcBlobUtf8> pUtf8;
+    IFT(hlsl::DxcGetBlobAsUtf8(pBlob, nullptr, &pUtf8));
+    WriteUtf8ToConsoleSizeT(pUtf8->GetStringPointer(), pUtf8->GetStringLength(), streamType);
+  }
 }
 }
 
 
-void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName) {
+void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName, _In_ UINT32 textCodePage) {
   if (pBlob == nullptr) {
   if (pBlob == nullptr) {
     return;
     return;
   }
   }
 
 
   CHandle file(CreateFileW(pFileName, GENERIC_WRITE, FILE_SHARE_READ, nullptr,
   CHandle file(CreateFileW(pFileName, GENERIC_WRITE, FILE_SHARE_READ, nullptr,
-                           CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
+    CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
   if (file == INVALID_HANDLE_VALUE) {
   if (file == INVALID_HANDLE_VALUE) {
     IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
     IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
   }
   }
-  WriteBlobToHandle(pBlob, file, pFileName);
+
+  WriteBlobToHandle(pBlob, file, pFileName, textCodePage);
 }
 }
 
 
-void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, _In_ HANDLE hFile, _In_opt_ LPCWSTR pFileName) {
+void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, _In_ HANDLE hFile, _In_opt_ LPCWSTR pFileName, _In_ UINT32 textCodePage) {
   if (pBlob == nullptr) {
   if (pBlob == nullptr) {
     return;
     return;
   }
   }
 
 
+  LPCVOID pPtr = pBlob->GetBufferPointer();
+  SIZE_T size = pBlob->GetBufferSize();
+
+  std::string BOM;
+  CComPtr<IDxcBlobUtf8> pBlobUtf8;
+  CComPtr<IDxcBlobUtf16> pBlobUtf16;
+  if (textCodePage == DXC_CP_UTF8) {
+    IFT_Data(BlobToUtf8IfText(pBlob, &pBlobUtf8), pFileName);
+    if (pBlobUtf8) {
+      pPtr = pBlobUtf8->GetStringPointer();
+      size = pBlobUtf8->GetStringLength();
+      // TBD: Should we write UTF-8 BOM?
+      //BOM = "\xef\xbb\xbf"; // UTF-8
+    }
+  } else if (textCodePage == DXC_CP_UTF16) {
+    IFT_Data(BlobToUtf16IfText(pBlob, &pBlobUtf16), pFileName);
+    if (pBlobUtf16) {
+      pPtr = pBlobUtf16->GetStringPointer();
+      size = pBlobUtf16->GetStringLength() * sizeof(wchar_t);
+      BOM = "\xff\xfe"; // UTF-16 LE
+    }
+  }
+
+  IFT_Data(size > (SIZE_T)UINT32_MAX ? E_OUTOFMEMORY : S_OK , pFileName);
+
   DWORD written;
   DWORD written;
-  if (FALSE == WriteFile(hFile, pBlob->GetBufferPointer(),
-    pBlob->GetBufferSize(), &written, nullptr)) {
+
+  if (!BOM.empty()) {
+    if (FALSE == WriteFile(hFile, BOM.data(), BOM.length(), &written, nullptr)) {
+      IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
+    }
+  }
+
+  if (FALSE == WriteFile(hFile, pPtr, (DWORD)size, &written, nullptr)) {
     IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
     IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
   }
   }
 }
 }
@@ -142,21 +235,10 @@ void WriteUtf8ToConsole(_In_opt_count_(charCount) const char *pText,
   std::string resultToPrint;
   std::string resultToPrint;
   wchar_t *utf16Message = nullptr;
   wchar_t *utf16Message = nullptr;
   size_t utf16MessageLen;
   size_t utf16MessageLen;
-  bool lossy; // Note: even if there was loss,  print anyway
   Unicode::UTF8BufferToUTF16Buffer(pText, charCount, &utf16Message,
   Unicode::UTF8BufferToUTF16Buffer(pText, charCount, &utf16Message,
                                    &utf16MessageLen);
                                    &utf16MessageLen);
 
 
-  std::string consoleMessage;
-  Unicode::UTF16ToConsoleString(utf16Message, &consoleMessage, &lossy);
-  if (streamType == STD_OUTPUT_HANDLE) {
-    fprintf(stdout, "%s\n", consoleMessage.c_str());
-  }
-  else if (streamType == STD_ERROR_HANDLE) {
-    fprintf(stderr, "%s\n", consoleMessage.c_str());
-  }
-  else {
-    throw hlsl::Exception(E_INVALIDARG);
-  }
+  WriteUtf16NullTermToConsole(utf16Message, streamType);
 
 
   delete[] utf16Message;
   delete[] utf16Message;
 }
 }

+ 5 - 4
lib/DxcSupport/dxcmem.cpp

@@ -42,17 +42,18 @@ HRESULT DxcInitThreadMalloc() throw() {
 
 
 void DxcCleanupThreadMalloc() throw() {
 void DxcCleanupThreadMalloc() throw() {
   if (g_ThreadMallocTls) {
   if (g_ThreadMallocTls) {
+    DXASSERT(g_pDefaultMalloc, "else DxcInitThreadMalloc didn't work/fail atomically");
     g_ThreadMallocTls->llvm::sys::ThreadLocal<IMalloc>::~ThreadLocal();
     g_ThreadMallocTls->llvm::sys::ThreadLocal<IMalloc>::~ThreadLocal();
     g_pDefaultMalloc->Free(g_ThreadMallocTls);
     g_pDefaultMalloc->Free(g_ThreadMallocTls);
     g_ThreadMallocTls = nullptr;
     g_ThreadMallocTls = nullptr;
-    DXASSERT(g_pDefaultMalloc, "else DxcInitThreadMalloc didn't work/fail atomically");
-    g_pDefaultMalloc->Release();
-    g_pDefaultMalloc = nullptr;
   }
   }
 }
 }
 
 
 IMalloc *DxcGetThreadMallocNoRef() throw() {
 IMalloc *DxcGetThreadMallocNoRef() throw() {
-  DXASSERT(g_ThreadMallocTls != nullptr, "else prior to DxcInitThreadMalloc or after DxcCleanupThreadMalloc");
+  if (g_ThreadMallocTls == nullptr) {
+    return g_pDefaultMalloc;
+  }
+
   return g_ThreadMallocTls->get();
   return g_ThreadMallocTls->get();
 }
 }
 
 

+ 17 - 4
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -1548,7 +1548,8 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
                                            llvm::StringRef DebugName,
                                            llvm::StringRef DebugName,
                                            SerializeDxilFlags Flags,
                                            SerializeDxilFlags Flags,
                                            DxilShaderHash *pShaderHashOut,
                                            DxilShaderHash *pShaderHashOut,
-                                           AbstractMemoryStream *pReflectionStreamOut) {
+                                           AbstractMemoryStream *pReflectionStreamOut,
+                                           AbstractMemoryStream *pRootSigStreamOut) {
   // TODO: add a flag to update the module and remove information that is not part
   // TODO: add a flag to update the module and remove information that is not part
   // of DXIL proper and is used only to assemble the container.
   // of DXIL proper and is used only to assemble the container.
 
 
@@ -1637,11 +1638,23 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
     writer.AddPart(
     writer.AddPart(
         DFCC_PipelineStateValidation, pPSVWriter->size(),
         DFCC_PipelineStateValidation, pPSVWriter->size(),
         [&](AbstractMemoryStream *pStream) { pPSVWriter->write(pStream); });
         [&](AbstractMemoryStream *pStream) { pPSVWriter->write(pStream); });
+
     // Write the root signature (RTS0) part.
     // Write the root signature (RTS0) part.
     if (rootSigWriter.size()) {
     if (rootSigWriter.size()) {
-      writer.AddPart(
-        DFCC_RootSignature, rootSigWriter.size(),
-        [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
+      if (pRootSigStreamOut) {
+        // Write root signature wrapped in container for separate output
+        DxilContainerWriter_impl rootSigContainerWriter;
+        rootSigContainerWriter.AddPart(
+          DFCC_RootSignature, rootSigWriter.size(),
+          [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
+        rootSigContainerWriter.write(pRootSigStreamOut);
+      }
+      if ((Flags & SerializeDxilFlags::StripRootSignature) == 0) {
+        // Write embedded root signature
+        writer.AddPart(
+          DFCC_RootSignature, rootSigWriter.size(),
+          [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
+      }
       bMetadataStripped |= pModule->StripRootSignatureFromMetadata();
       bMetadataStripped |= pModule->StripRootSignatureFromMetadata();
     }
     }
   }
   }

+ 5 - 0
lib/DxilDia/DxilDiaSymbolManager.cpp

@@ -1889,6 +1889,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::IsDbgDeclareCall(
 
 
   std::vector<dxil_dia::Session::RVA> usesRVAs;
   std::vector<dxil_dia::Session::RVA> usesRVAs;
 
 
+  bool HasRegister = false;
   if (auto *RegMV = llvm::dyn_cast<llvm::MetadataAsValue>(CI->getArgOperand(0))) {
   if (auto *RegMV = llvm::dyn_cast<llvm::MetadataAsValue>(CI->getArgOperand(0))) {
     if (auto *RegVM = llvm::dyn_cast<llvm::ValueAsMetadata>(RegMV->getMetadata())) {
     if (auto *RegVM = llvm::dyn_cast<llvm::ValueAsMetadata>(RegMV->getMetadata())) {
       if (auto *Reg = llvm::dyn_cast<llvm::Instruction>(RegVM->getValue())) {
       if (auto *Reg = llvm::dyn_cast<llvm::Instruction>(RegVM->getValue())) {
@@ -1897,6 +1898,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::IsDbgDeclareCall(
         if (hr != S_OK) {
         if (hr != S_OK) {
           return hr;
           return hr;
         }
         }
+        HasRegister = true;
         llvm::iterator_range<llvm::Value::user_iterator> users = Reg->users();
         llvm::iterator_range<llvm::Value::user_iterator> users = Reg->users();
         for (llvm::User *user : users) {
         for (llvm::User *user : users) {
           auto *inst = llvm::dyn_cast<llvm::Instruction>(user);
           auto *inst = llvm::dyn_cast<llvm::Instruction>(user);
@@ -1908,6 +1910,9 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::IsDbgDeclareCall(
       }
       }
     }
     }
   }
   }
+  if (!HasRegister) {
+    return E_FAIL;
+  }
 
 
   if (!usesRVAs.empty()) {
   if (!usesRVAs.empty()) {
     *pLowestUserRVA = *std::min_element(usesRVAs.begin(), usesRVAs.end());
     *pLowestUserRVA = *std::min_element(usesRVAs.begin(), usesRVAs.end());

+ 1 - 1
lib/DxilDia/DxilDiaSymbolManager.h

@@ -60,7 +60,7 @@ public:
 
 
 
 
   SymbolManager();
   SymbolManager();
-  SymbolManager(SymbolManager&&) = default;
+  SymbolManager(SymbolManager &&) = default;
   SymbolManager &operator =(SymbolManager &&) = default;
   SymbolManager &operator =(SymbolManager &&) = default;
   ~SymbolManager();
   ~SymbolManager();
 
 

+ 1 - 1
lib/DxrFallback/StateFunctionTransform.cpp

@@ -45,7 +45,7 @@ inline std::string stringf(const char* fmt, ...)
   {
   {
     ret.resize(size);
     ret.resize(size);
     va_start(args, fmt);
     va_start(args, fmt);
-    vsnprintf((char*)ret.data(), size + 1, fmt, args);
+    vsnprintf(const_cast<char*>(ret.data()), size + 1, fmt, args);
     va_end(args);
     va_end(args);
   }
   }
   return ret;
   return ret;

+ 2 - 0
lib/HLSL/CMakeLists.txt

@@ -18,6 +18,7 @@ add_llvm_library(LLVMHLSL
   DxilPromoteResourcePasses.cpp
   DxilPromoteResourcePasses.cpp
   DxilPackSignatureElement.cpp
   DxilPackSignatureElement.cpp
   DxilPatchShaderRecordBindings.cpp
   DxilPatchShaderRecordBindings.cpp
+  DxilNoops.cpp
   DxilPreserveAllOutputs.cpp
   DxilPreserveAllOutputs.cpp
   DxilSimpleGVNHoist.cpp
   DxilSimpleGVNHoist.cpp
   DxilSignatureValidation.cpp
   DxilSignatureValidation.cpp
@@ -26,6 +27,7 @@ add_llvm_library(LLVMHLSL
   DxilTranslateRawBuffer.cpp
   DxilTranslateRawBuffer.cpp
   DxilExportMap.cpp
   DxilExportMap.cpp
   DxilValidation.cpp
   DxilValidation.cpp
+  DxilValueCache.cpp
   DxcOptimizer.cpp
   DxcOptimizer.cpp
   HLDeadFunctionElimination.cpp
   HLDeadFunctionElimination.cpp
   HLExpandStoreIntrinsics.cpp
   HLExpandStoreIntrinsics.cpp

+ 1 - 1
lib/HLSL/ComputeViewIdStateBuilder.cpp

@@ -840,7 +840,7 @@ void DxilViewIdStateBuilder::CreateViewIdSets(const std::unordered_map<unsigned,
 unsigned DxilViewIdStateBuilder::GetLinearIndex(DxilSignatureElement &SigElem, int row, unsigned col) const {
 unsigned DxilViewIdStateBuilder::GetLinearIndex(DxilSignatureElement &SigElem, int row, unsigned col) const {
   DXASSERT_NOMSG(row >= 0 && col < kNumComps && SigElem.GetStartRow() != Semantic::kUndefinedRow);
   DXASSERT_NOMSG(row >= 0 && col < kNumComps && SigElem.GetStartRow() != Semantic::kUndefinedRow);
   unsigned idx = (((unsigned)row) + SigElem.GetStartRow())*kNumComps + col + SigElem.GetStartCol();
   unsigned idx = (((unsigned)row) + SigElem.GetStartRow())*kNumComps + col + SigElem.GetStartCol();
-  DXASSERT_NOMSG(idx < kMaxSigScalars);
+  DXASSERT_NOMSG(idx < kMaxSigScalars); (void)kMaxSigScalars;
   return idx;
   return idx;
 }
 }
 
 

+ 5 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -20,6 +20,7 @@
 #include "dxc/HLSL/HLMatrixLowerPass.h"
 #include "dxc/HLSL/HLMatrixLowerPass.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/ComputeViewIdState.h"
 #include "dxc/HLSL/ComputeViewIdState.h"
+#include "dxc/HLSL/DxilValueCache.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/Support/dxcapi.impl.h"
 #include "dxc/Support/dxcapi.impl.h"
 
 
@@ -92,12 +93,15 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilConvergentMarkPass(Registry);
     initializeDxilConvergentMarkPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
+    initializeDxilEliminateVectorPass(Registry);
     initializeDxilEmitMetadataPass(Registry);
     initializeDxilEmitMetadataPass(Registry);
     initializeDxilEraseDeadRegionPass(Registry);
     initializeDxilEraseDeadRegionPass(Registry);
     initializeDxilExpandTrigIntrinsicsPass(Registry);
     initializeDxilExpandTrigIntrinsicsPass(Registry);
     initializeDxilFinalizeModulePass(Registry);
     initializeDxilFinalizeModulePass(Registry);
+    initializeDxilFinalizeNoopsPass(Registry);
     initializeDxilFixConstArrayInitializerPass(Registry);
     initializeDxilFixConstArrayInitializerPass(Registry);
     initializeDxilGenerationPassPass(Registry);
     initializeDxilGenerationPassPass(Registry);
+    initializeDxilInsertNoopsPass(Registry);
     initializeDxilLegalizeEvalOperationsPass(Registry);
     initializeDxilLegalizeEvalOperationsPass(Registry);
     initializeDxilLegalizeResourcesPass(Registry);
     initializeDxilLegalizeResourcesPass(Registry);
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
@@ -110,6 +114,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilPromoteStaticResourcesPass(Registry);
     initializeDxilPromoteStaticResourcesPass(Registry);
     initializeDxilSimpleGVNHoistPass(Registry);
     initializeDxilSimpleGVNHoistPass(Registry);
     initializeDxilTranslateRawBufferPass(Registry);
     initializeDxilTranslateRawBufferPass(Registry);
+    initializeDxilValueCachePass(Registry);
     initializeDynamicIndexingVectorToArrayPass(Registry);
     initializeDynamicIndexingVectorToArrayPass(Registry);
     initializeEarlyCSELegacyPassPass(Registry);
     initializeEarlyCSELegacyPassPass(Registry);
     initializeEliminateAvailableExternallyPass(Registry);
     initializeEliminateAvailableExternallyPass(Registry);

+ 174 - 4
lib/HLSL/DxilCondenseResources.cpp

@@ -21,6 +21,7 @@
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLMatrixType.h"
 #include "dxc/HLSL/HLMatrixType.h"
 #include "dxc/HLSL/HLModule.h"
 #include "dxc/HLSL/HLModule.h"
+#include "dxc/HLSL/DxilValueCache.h"
 
 
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -422,6 +423,168 @@ ModulePass *llvm::createDxilCondenseResourcesPass() {
 
 
 INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
 INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
 
 
+static
+bool LegalizeResourcesPHIs(Module &M, DxilValueCache *DVC) {
+
+  // Simple pass to collect resource PHI's
+  SmallVector<PHINode *, 8> PHIs;
+  for (Function &F : M) {
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : BB) {
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          if (hlsl::dxilutil::IsHLSLObjectType(PN->getType())) {
+            PHIs.push_back(PN);
+          }
+        }
+        else {
+          break;
+        }
+
+      }
+    }
+  }
+
+  if (PHIs.empty())
+    return false;
+
+  // Do a very simple CFG simplification of removing diamond graphs.
+  std::vector<BasicBlock *> DeadBlocks;
+  std::unordered_set<BasicBlock *> DeadBlocksSet;
+  for (PHINode *PN : PHIs) {
+    for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+      BasicBlock *BB = PN->getIncomingBlock(i);
+      if (DeadBlocksSet.count(BB)) continue;
+      if (DVC->IsNeverReachable(BB)) {
+        DeadBlocksSet.insert(BB);
+        DeadBlocks.push_back(BB);
+      }
+    }
+  }
+
+  bool Changed = false;
+  SmallVector<Value *, 3> CleanupValues;
+  SmallPtrSet<Value *, 3> CleanupValuesSet;
+  auto AddCleanupValues = [&CleanupValues, &CleanupValuesSet](Value *V) {
+    if (!CleanupValuesSet.count(V)) {
+      CleanupValuesSet.insert(V);
+      CleanupValues.push_back(V);
+    }
+  };
+
+  for (unsigned i = 0; i < DeadBlocks.size(); i++) {
+    BasicBlock *BB = DeadBlocks[i];
+    BasicBlock *Pred = BB->getSinglePredecessor();
+    BasicBlock *Succ = BB->getSingleSuccessor();
+
+    if (!Pred || !Succ)
+      continue;
+
+    // A very simple folding of diamond graph.
+    BranchInst *Br = cast<BranchInst>(Pred->getTerminator());
+    BasicBlock *Peer = nullptr;
+    if (Br->isConditional())
+      Peer = Br->getSuccessor(0) == BB ? 
+          Br->getSuccessor(1) : Br->getSuccessor(0);
+
+    if (Peer && Peer->getSingleSuccessor() == Succ) {
+      Changed = true;
+
+      BranchInst::Create(Peer, Pred);
+
+      Br->dropAllReferences();
+      Br->eraseFromParent();
+
+      for (Instruction &I : *Succ)
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          if (Instruction *IncomingI = dyn_cast<Instruction>(PN->getIncomingValueForBlock(BB))) {
+            if (!DeadBlocksSet.count(IncomingI->getParent()))
+              AddCleanupValues(IncomingI); // Mark the incoming value for deletion
+          }
+          PN->removeIncomingValue(BB);
+
+          if (PN->getNumIncomingValues() == 1) {
+            PN->replaceAllUsesWith(PN->getIncomingValue(0));
+            std::remove(PHIs.begin(), PHIs.end(), PN);
+            AddCleanupValues(PN); // Mark for deletion
+          }
+        }
+        else
+          break;
+
+      BB->dropAllReferences();
+      while (!BB->empty()){
+        Instruction *ChildI = &*BB->rbegin();
+        if (PHINode *PN = dyn_cast<PHINode>(ChildI))
+          std::remove(PHIs.begin(), PHIs.end(), PN);
+        ChildI->eraseFromParent();
+      }
+      BB->eraseFromParent();
+    }
+  }
+
+  unsigned Attempts = PHIs.size();
+  for (unsigned AttemptIdx = 0; AttemptIdx < Attempts; AttemptIdx++) {
+    bool LocalChanged = false;
+    for (auto It = PHIs.begin(); It != PHIs.end();) {
+      PHINode *PN = *It;
+      if (Value *V = DVC->GetValue(PN)) {
+
+        PHIs.erase(It);
+        AddCleanupValues(PN); // Mark for deletion later
+        PN->replaceAllUsesWith(V);
+        Changed = true;
+        LocalChanged = true;
+
+        for (unsigned i = 0, C = PN->getNumIncomingValues(); i < C; i++) {
+          Value *IncomingV = PN->getIncomingValue(i);
+          if (IncomingV != V)
+            AddCleanupValues(IncomingV); // Mark the incoming value for deletion later
+        }
+      }
+      else {
+        It++;
+      }
+    }
+
+    if (!LocalChanged)
+      break;
+  }
+
+  // Simple DCE to remove all dependencies of the resource PHI nodes we removed.
+  // This may be a little too agressive
+  for (;;) {
+    bool LocalChanged = false;
+    // Must use a numeric idx instead of an interator, because
+    // we're modifying the array as we go. Iterator gets invalidated
+    // because they're just pointers.
+    for (unsigned Idx = 0; Idx < CleanupValues.size();) {
+      Value *V = CleanupValues[Idx];
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        if (I->user_empty()) {
+          // Add dependencies to process
+          for (Value *Op : I->operands()) {
+            AddCleanupValues(Op);
+          }
+          LocalChanged = true;
+          I->eraseFromParent();
+          CleanupValues.erase(CleanupValues.begin() + Idx);
+        }
+        else {
+          Idx++;
+        }
+      }
+      else {
+        CleanupValues.erase(CleanupValues.begin() + Idx);
+      }
+    }
+
+    Changed |= LocalChanged;
+    if (!LocalChanged)
+      break;
+  }
+  return Changed;
+}
+
 namespace {
 namespace {
 class DxilLowerCreateHandleForLib : public ModulePass {
 class DxilLowerCreateHandleForLib : public ModulePass {
 private:
 private:
@@ -434,6 +597,10 @@ public:
   static char ID; // Pass identification, replacement for typeid
   static char ID; // Pass identification, replacement for typeid
   explicit DxilLowerCreateHandleForLib() : ModulePass(ID) {}
   explicit DxilLowerCreateHandleForLib() : ModulePass(ID) {}
 
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DxilValueCache>();
+  }
+
   const char *getPassName() const override {
   const char *getPassName() const override {
     return "DXIL Lower createHandleForLib";
     return "DXIL Lower createHandleForLib";
   }
   }
@@ -483,6 +650,9 @@ public:
     if (m_bIsLib && DM.GetShaderModel()->GetMinor() == ShaderModel::kOfflineMinor)
     if (m_bIsLib && DM.GetShaderModel()->GetMinor() == ShaderModel::kOfflineMinor)
       return bChanged;
       return bChanged;
 
 
+    DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
+    bChanged |= LegalizeResourcesPHIs(M, DVC);
+
     // Make sure no select on resource.
     // Make sure no select on resource.
     bChanged |= RemovePhiOnResource();
     bChanged |= RemovePhiOnResource();
 
 
@@ -1731,9 +1901,7 @@ void DxilLowerCreateHandleForLib::UpdateStructTypeForLegacyLayout() {
 
 
 // Change ResourceSymbol to undef if don't need.
 // Change ResourceSymbol to undef if don't need.
 void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
 void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
-  std::vector<GlobalVariable *> &LLVMUsed = m_DM->GetLLVMUsed();
-
-  auto UpdateResourceSymbol = [&LLVMUsed, this](DxilResourceBase *res) {
+  auto UpdateResourceSymbol = [](DxilResourceBase *res) {
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(res->GetGlobalSymbol())) {
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(res->GetGlobalSymbol())) {
       GV->removeDeadConstantUsers();
       GV->removeDeadConstantUsers();
       DXASSERT(GV->user_empty(), "else resource not lowered");
       DXASSERT(GV->user_empty(), "else resource not lowered");
@@ -2254,7 +2422,9 @@ ModulePass *llvm::createDxilLowerCreateHandleForLibPass() {
   return new DxilLowerCreateHandleForLib();
   return new DxilLowerCreateHandleForLib();
 }
 }
 
 
-INITIALIZE_PASS(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)
+INITIALIZE_PASS_BEGIN(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)
+INITIALIZE_PASS_DEPENDENCY(DxilValueCache)
+INITIALIZE_PASS_END(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)
 
 
 
 
 class DxilAllocateResourcesForLib : public ModulePass {
 class DxilAllocateResourcesForLib : public ModulePass {

+ 27 - 5
lib/HLSL/DxilContainerReflection.cpp

@@ -415,6 +415,7 @@ protected:
 
 
 public:
 public:
   // Internal
   // Internal
+  HRESULT InitializeEmpty();
   HRESULT Initialize(
   HRESULT Initialize(
     DxilModule              &M,
     DxilModule              &M,
     llvm::Type              *type,
     llvm::Type              *type,
@@ -795,6 +796,12 @@ static bool IsObjectType(
   return TryToDetectObjectType(structType, &ignored);
   return TryToDetectObjectType(structType, &ignored);
 }
 }
 
 
+HRESULT CShaderReflectionType::InitializeEmpty()
+{
+  ZeroMemory(&m_Desc, sizeof(m_Desc));
+  return S_OK;
+}
+
 // Main logic for translating an LLVM type and associated
 // Main logic for translating an LLVM type and associated
 // annotations into a D3D shader reflection type.
 // annotations into a D3D shader reflection type.
 HRESULT CShaderReflectionType::Initialize(
 HRESULT CShaderReflectionType::Initialize(
@@ -1282,7 +1289,9 @@ void CShaderReflectionConstantBuffer::InitializeStructuredBuffer(
   VarDesc.StartSampler = UINT_MAX;
   VarDesc.StartSampler = UINT_MAX;
   VarDesc.uFlags |= D3D_SVF_USED;
   VarDesc.uFlags |= D3D_SVF_USED;
   CShaderReflectionVariable Var;
   CShaderReflectionVariable Var;
-  CShaderReflectionType *pVarType = nullptr;
+
+  // First type is an empty type: returned if no annotation available.
+  CShaderReflectionType *pVarType = allTypes[0].get();
 
 
   // Create reflection type, if we have the necessary annotation info
   // Create reflection type, if we have the necessary annotation info
 
 
@@ -1685,6 +1694,14 @@ void DxilShaderReflection::SetCBufferUsage() {
 void DxilModuleReflection::CreateReflectionObjects() {
 void DxilModuleReflection::CreateReflectionObjects() {
   DXASSERT_NOMSG(m_pDxilModule != nullptr);
   DXASSERT_NOMSG(m_pDxilModule != nullptr);
 
 
+  {
+    // Add empty type for when no type info is available, instead of returning nullptr.
+    DXASSERT_NOMSG(m_Types.empty());
+    CShaderReflectionType *pEmptyType = new CShaderReflectionType();
+    m_Types.push_back(std::unique_ptr<CShaderReflectionType>(pEmptyType));
+    pEmptyType->InitializeEmpty();
+  }
+
   // Create constant buffers, resources and signatures.
   // Create constant buffers, resources and signatures.
   for (auto && cb : m_pDxilModule->GetCBuffers()) {
   for (auto && cb : m_pDxilModule->GetCBuffers()) {
     std::unique_ptr<CShaderReflectionConstantBuffer> rcb(new CShaderReflectionConstantBuffer());
     std::unique_ptr<CShaderReflectionConstantBuffer> rcb(new CShaderReflectionConstantBuffer());
@@ -1908,7 +1925,8 @@ HRESULT DxilModuleReflection::LoadRDAT(const DxilPartHeader *pPart) {
 }
 }
 
 
 HRESULT DxilModuleReflection::LoadModule(const DxilPartHeader *pShaderPart) {
 HRESULT DxilModuleReflection::LoadModule(const DxilPartHeader *pShaderPart) {
-  DXASSERT_NOMSG(pShaderPart != nullptr);
+  if (pShaderPart == nullptr)
+    return E_INVALIDARG;
   const char *pData = GetDxilPartData(pShaderPart);
   const char *pData = GetDxilPartData(pShaderPart);
   try {
   try {
     const char *pBitcode;
     const char *pBitcode;
@@ -1916,14 +1934,18 @@ HRESULT DxilModuleReflection::LoadModule(const DxilPartHeader *pShaderPart) {
     GetDxilProgramBitcode((DxilProgramHeader *)pData, &pBitcode, &bitcodeLength);
     GetDxilProgramBitcode((DxilProgramHeader *)pData, &pBitcode, &bitcodeLength);
     std::unique_ptr<MemoryBuffer> pMemBuffer =
     std::unique_ptr<MemoryBuffer> pMemBuffer =
         MemoryBuffer::getMemBufferCopy(StringRef(pBitcode, bitcodeLength));
         MemoryBuffer::getMemBufferCopy(StringRef(pBitcode, bitcodeLength));
+    bool bBitcodeLoadError = false;
+    auto errorHandler = [&bBitcodeLoadError](const DiagnosticInfo &diagInfo) {
+        bBitcodeLoadError |= diagInfo.getSeverity() == DS_Error;
+      };
 #if 0 // We materialize eagerly, because we'll need to walk instructions to look for usage information.
 #if 0 // We materialize eagerly, because we'll need to walk instructions to look for usage information.
     ErrorOr<std::unique_ptr<Module>> module =
     ErrorOr<std::unique_ptr<Module>> module =
-        getLazyBitcodeModule(std::move(pMemBuffer), Context);
+        getLazyBitcodeModule(std::move(pMemBuffer), Context, errorHandler);
 #else
 #else
     ErrorOr<std::unique_ptr<Module>> module =
     ErrorOr<std::unique_ptr<Module>> module =
-      parseBitcodeFile(pMemBuffer->getMemBufferRef(), Context, nullptr);
+      parseBitcodeFile(pMemBuffer->getMemBufferRef(), Context, errorHandler);
 #endif
 #endif
-    if (!module) {
+    if (!module || bBitcodeLoadError) {
       return E_INVALIDARG;
       return E_INVALIDARG;
     }
     }
     std::swap(m_pModule, module.get());
     std::swap(m_pModule, module.get());

+ 164 - 0
lib/HLSL/DxilNoops.cpp

@@ -0,0 +1,164 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilNoops.cpp                                                             //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Passes to insert dx.noops() and replace them with llvm.donothing()        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+namespace {
+StringRef kNoopName = "dx.noop";
+}
+
+//==========================================================
+// Insertion pass
+//
+
+namespace {
+
+Function *GetOrCreateNoopF(Module &M) {
+  LLVMContext &Ctx = M.getContext();
+  FunctionType *FT = FunctionType::get(Type::getVoidTy(Ctx), false);
+  Function *NoopF = cast<Function>(M.getOrInsertFunction(::kNoopName, FT));
+  NoopF->addFnAttr(Attribute::AttrKind::Convergent);
+  return NoopF;
+}
+
+class DxilInsertNoops : public FunctionPass {
+public:
+  static char ID;
+  DxilInsertNoops() : FunctionPass(ID) {
+    initializeDxilInsertNoopsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override { return "Dxil Insert Noops"; }
+};
+
+char DxilInsertNoops::ID;
+}
+
+bool DxilInsertNoops::runOnFunction(Function &F) {
+  Module &M = *F.getParent();
+  Function *NoopF = nullptr;
+  bool Changed = false;
+
+  // Find instructions where we want to insert nops
+  for (BasicBlock &BB : F) {
+    for (BasicBlock::iterator It = BB.begin(), E = BB.end(); It != E;) {
+      bool InsertNop = false;
+      Instruction &I = *(It++);
+      // If we are calling a real function, insert one
+      // at the callsite.
+      if (CallInst *Call = dyn_cast<CallInst>(&I)) {
+        if (Function *F = Call->getCalledFunction()) {
+          if (!F->isDeclaration())
+            InsertNop = true;
+        }
+      }
+      else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(&I)) {
+        InsertNop = true;
+      }
+      // If we have a copy, e.g:
+      //     float x = 0;
+      //     float y = x;    <---- copy
+      // insert a nop there.
+      else if (StoreInst *Store = dyn_cast<StoreInst>(&I)) {
+        Value *V = Store->getValueOperand();
+        if (isa<LoadInst>(V) || isa<Constant>(V))
+          InsertNop = true;
+      }
+      // If we have a return, just to be safe.
+      else if (ReturnInst *Ret = dyn_cast<ReturnInst>(&I)) {
+        InsertNop = true;
+      }
+
+      // Do the insertion
+      if (InsertNop) {
+        if (!NoopF) 
+          NoopF = GetOrCreateNoopF(M);
+        CallInst *Noop = CallInst::Create(NoopF, {}, &I);
+        Noop->setDebugLoc(I.getDebugLoc());
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createDxilInsertNoopsPass() {
+  return new DxilInsertNoops();
+}
+
+INITIALIZE_PASS(DxilInsertNoops, "dxil-insert-noops", "Dxil Insert Noops", false, false)
+
+
+//==========================================================
+// Finalize pass
+//
+
+namespace {
+
+class DxilFinalizeNoops : public ModulePass {
+public:
+  static char ID;
+  DxilFinalizeNoops() : ModulePass(ID) {
+    initializeDxilFinalizeNoopsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "Dxil Finalize Noops"; }
+};
+
+char DxilFinalizeNoops::ID;
+}
+
+// Replace all @dx.noop's with @llvm.donothing
+bool DxilFinalizeNoops::runOnModule(Module &M) {
+  Function *NoopF = nullptr;
+  for (Function &F : M) {
+    if (!F.isDeclaration())
+      continue;
+    if (F.getName() == ::kNoopName) {
+      NoopF = &F;
+      break;
+    }
+  }
+
+  if (!NoopF)
+    return false;
+
+  if (!NoopF->user_empty()) {
+    Function *DoNothingF = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
+    for (auto It = NoopF->user_begin(), E = NoopF->user_end(); It != E;) {
+      User *U = *(It++);
+      CallInst *CI = cast<CallInst>(U);
+      CI->setCalledFunction(DoNothingF);
+    }
+  }
+
+  assert(NoopF->user_empty() && "dx.noop calls must be all removed now");
+  NoopF->eraseFromParent();
+
+  return true;
+}
+
+Pass *llvm::createDxilFinalizeNoopsPass() {
+  return new DxilFinalizeNoops();
+}
+
+INITIALIZE_PASS(DxilFinalizeNoops, "dxil-finalize-noops", "Dxil Finalize Noops", false, false)
+

+ 7 - 14
lib/HLSL/DxilPatchShaderRecordBindings.cpp

@@ -91,7 +91,7 @@ struct ShaderRecordEntry {
   unsigned int RecordOffsetInBytes;
   unsigned int RecordOffsetInBytes;
   unsigned int OffsetInDescriptors; // Only valid for descriptor tables
   unsigned int OffsetInDescriptors; // Only valid for descriptor tables
 
 
-  static ShaderRecordEntry InvalidEntry() { return { (DxilRootParameterType)-1, (unsigned int)-1 }; }
+  static ShaderRecordEntry InvalidEntry() { return { (DxilRootParameterType)-1, (unsigned int)-1, 0 }; }
   bool IsInvalid() { return (unsigned int)ParameterType == (unsigned int)-1; }
   bool IsInvalid() { return (unsigned int)ParameterType == (unsigned int)-1; }
 };
 };
 
 
@@ -176,7 +176,7 @@ private:
   llvm::Function *EntryPointFunction;
   llvm::Function *EntryPointFunction;
 
 
   ShaderInfo *pInputShaderInfo;
   ShaderInfo *pInputShaderInfo;
-  DxilVersionedRootSignatureDesc *pRootSignatureDesc;
+  const DxilVersionedRootSignatureDesc *pRootSignatureDesc;
   DXIL::ShaderKind ShaderKind;
   DXIL::ShaderKind ShaderKind;
 };
 };
 
 
@@ -222,7 +222,7 @@ void DxilPatchShaderRecordBindings::applyOptions(PassOptions O) {
     if (0 == option.first.compare("root-signature")) {
     if (0 == option.first.compare("root-signature")) {
       unsigned int cHexRadix = 16;
       unsigned int cHexRadix = 16;
       pInputShaderInfo = (ShaderInfo*)strtoull(option.second.data(), nullptr, cHexRadix);
       pInputShaderInfo = (ShaderInfo*)strtoull(option.second.data(), nullptr, cHexRadix);
-      pRootSignatureDesc = (DxilVersionedRootSignatureDesc*)pInputShaderInfo->pRootSignatureDesc;
+      pRootSignatureDesc = (const DxilVersionedRootSignatureDesc*)pInputShaderInfo->pRootSignatureDesc;
     }
     }
   }
   }
 }
 }
@@ -386,11 +386,6 @@ DXIL::ShaderKind GetRayShaderKindCopy(Function* F)
     return DXIL::ShaderKind::Invalid;
     return DXIL::ShaderKind::Invalid;
 }
 }
 
 
-static std::string ws2s(const std::wstring& wide)
-{
-    return std::string(wide.begin(), wide.end());
-}
-
 bool DxilPatchShaderRecordBindings::runOnModule(Module &M) {
 bool DxilPatchShaderRecordBindings::runOnModule(Module &M) {
   DxilModule &DM = M.GetOrCreateDxilModule();
   DxilModule &DM = M.GetOrCreateDxilModule();
   EntryPointFunction = pInputShaderInfo->ExportName ? getFunctionFromName(M, pInputShaderInfo->ExportName) : DM.GetEntryFunction();
   EntryPointFunction = pInputShaderInfo->ExportName ? getFunctionFromName(M, pInputShaderInfo->ExportName) : DM.GetEntryFunction();
@@ -524,7 +519,6 @@ llvm::Value *DxilPatchShaderRecordBindings::GetAliasedDescriptorHeapHandle(Modul
 
 
 
 
         llvm::ArrayType *descriptorHeapType = ArrayType::get(type, 0);
         llvm::ArrayType *descriptorHeapType = ArrayType::get(type, 0);
-        static unsigned int i = 0;
         unsigned int id = AddAliasedHandle(M, FallbackLayerDescriptorHeapTable, FallbackLayerRegisterSpace + FallbackLayerDescriptorHeapSpaceOffset + registerSpaceOffset, resClass, resKind, HandleName, descriptorHeapType);
         unsigned int id = AddAliasedHandle(M, FallbackLayerDescriptorHeapTable, FallbackLayerRegisterSpace + FallbackLayerDescriptorHeapSpaceOffset + registerSpaceOffset, resClass, resKind, HandleName, descriptorHeapType);
         
         
         TypeToAliasedDescriptorHeap[resClassIndex][key] = GetResourceFromID(DM, resClass, id).GetGlobalSymbol();
         TypeToAliasedDescriptorHeap[resClassIndex][key] = GetResourceFromID(DM, resClass, id).GetGlobalSymbol();
@@ -695,7 +689,7 @@ bool DxilPatchShaderRecordBindings::IsCBufferLoad(llvm::Instruction *instruction
   return cbufferLoad || cbufferLoadLegacy;
   return cbufferLoad || cbufferLoadLegacy;
 }
 }
 
 
-const unsigned int GetResolvedRangeID(DXIL::ResourceClass resClass, Value *rangeIdVal)
+unsigned int GetResolvedRangeID(DXIL::ResourceClass resClass, Value *rangeIdVal)
 {
 {
   if (auto CI = dyn_cast<ConstantInt>(rangeIdVal))
   if (auto CI = dyn_cast<ConstantInt>(rangeIdVal))
   {
   {
@@ -854,7 +848,6 @@ void DxilPatchShaderRecordBindings::PatchShaderBindings(Module &M) {
   std::vector<llvm::Instruction *> instructionsToRemove;
   std::vector<llvm::Instruction *> instructionsToRemove;
   for (BasicBlock &block : EntryPointFunction->getBasicBlockList()) {
   for (BasicBlock &block : EntryPointFunction->getBasicBlockList()) {
     auto & Instructions = block.getInstList();
     auto & Instructions = block.getInstList();
-    auto It = Instructions.begin();
 
 
     for (auto &instr : Instructions) {
     for (auto &instr : Instructions) {
       DxilInst_CreateHandleForLib createHandleForLib(&instr);
       DxilInst_CreateHandleForLib createHandleForLib(&instr);
@@ -1083,7 +1076,7 @@ ShaderRecordEntry FindRootSignatureDescriptorHelper(
                                                        dxilParamType) &&
                                                        dxilParamType) &&
             baseRegisterIndex == rootParam.Constants.ShaderRegister &&
             baseRegisterIndex == rootParam.Constants.ShaderRegister &&
             registerSpace == rootParam.Constants.RegisterSpace) {
             registerSpace == rootParam.Constants.RegisterSpace) {
-          return {dxilParamType, recordOffset};
+          return {dxilParamType, recordOffset, 0};
         }
         }
         recordOffset += rootParam.Constants.Num32BitValues * sizeof(uint32_t);
         recordOffset += rootParam.Constants.Num32BitValues * sizeof(uint32_t);
         break;
         break;
@@ -1094,7 +1087,7 @@ ShaderRecordEntry FindRootSignatureDescriptorHelper(
         for (unsigned int rangeIndex = 0;
         for (unsigned int rangeIndex = 0;
              rangeIndex < descriptorTable.NumDescriptorRanges; rangeIndex++) {
              rangeIndex < descriptorTable.NumDescriptorRanges; rangeIndex++) {
           auto &range = descriptorTable.pDescriptorRanges[rangeIndex];
           auto &range = descriptorTable.pDescriptorRanges[rangeIndex];
-          if (range.OffsetInDescriptorsFromTableStart != -1) {
+          if (range.OffsetInDescriptorsFromTableStart != (unsigned)-1) {
             rangeOffsetInDescriptors = range.OffsetInDescriptorsFromTableStart;
             rangeOffsetInDescriptors = range.OffsetInDescriptorsFromTableStart;
           }
           }
 
 
@@ -1121,7 +1114,7 @@ ShaderRecordEntry FindRootSignatureDescriptorHelper(
                                                        dxilParamType) &&
                                                        dxilParamType) &&
             baseRegisterIndex == rootParam.Descriptor.ShaderRegister &&
             baseRegisterIndex == rootParam.Descriptor.ShaderRegister &&
             registerSpace == rootParam.Descriptor.RegisterSpace) {
             registerSpace == rootParam.Descriptor.RegisterSpace) {
-          return {dxilParamType, recordOffset};
+          return {dxilParamType, recordOffset, 0};
         }
         }
 
 
         recordOffset += SizeofD3D12GpuVA;
         recordOffset += SizeofD3D12GpuVA;

+ 25 - 5
lib/HLSL/DxilPreparePasses.cpp

@@ -256,11 +256,6 @@ static bool GetUnsignedVal(Value *V, uint32_t *pValue) {
   return true;
   return true;
 }
 }
 
 
-static uint8_t NegMask(uint8_t V) {
-  V ^= 0xF;
-  return V & 0xF;
-}
-
 static void MarkUsedSignatureElements(Function *F, DxilModule &DM) {
 static void MarkUsedSignatureElements(Function *F, DxilModule &DM) {
   DXASSERT_NOMSG(F != nullptr);
   DXASSERT_NOMSG(F != nullptr);
   // For every loadInput/storeOutput, update the corresponding ReadWriteMask.
   // For every loadInput/storeOutput, update the corresponding ReadWriteMask.
@@ -336,6 +331,27 @@ public:
 
 
   const char *getPassName() const override { return "HLSL DXIL Finalize Module"; }
   const char *getPassName() const override { return "HLSL DXIL Finalize Module"; }
 
 
+  void patchValidation_1_5(Module &M) {
+    Function *DoNothingF = nullptr;
+    for (Function &F : M) {
+      if (F.isIntrinsic() && F.getIntrinsicID() == Intrinsic::donothing) {
+        DoNothingF = &F;
+        break;
+      }
+    }
+
+    if (!DoNothingF)
+      return;
+
+    for (auto It = DoNothingF->user_begin(), E = DoNothingF->user_end(); It != E; ) {
+      User *U = *(It++);
+      cast<Instruction>(U)->eraseFromParent();
+    }
+
+    assert(DoNothingF->user_empty() && "Not all users removed from @llvm.donothing");
+    DoNothingF->eraseFromParent();
+  }
+
   void patchValidation_1_1(Module &M) {
   void patchValidation_1_1(Module &M) {
     for (iplist<Function>::iterator F : M.getFunctionList()) {
     for (iplist<Function>::iterator F : M.getFunctionList()) {
       for (Function::iterator BBI = F->begin(), BBE = F->end(); BBI != BBE;
       for (Function::iterator BBI = F->begin(), BBE = F->end(); BBI != BBE;
@@ -383,6 +399,10 @@ public:
           MarkUsedSignatureElements(DM.GetPatchConstantFunction(), DM);
           MarkUsedSignatureElements(DM.GetPatchConstantFunction(), DM);
       }
       }
 
 
+      if (ValMajor == 1 && ValMinor <= 5) {
+        patchValidation_1_5(M);
+      }
+
       // Remove store undef output.
       // Remove store undef output.
       hlsl::OP *hlslOP = M.GetDxilModule().GetOP();
       hlsl::OP *hlslOP = M.GetDxilModule().GetOP();
       RemoveStoreUndefOutput(M, hlslOP);
       RemoveStoreUndefOutput(M, hlslOP);

+ 451 - 0
lib/HLSL/DxilValueCache.cpp

@@ -0,0 +1,451 @@
+//===---------- DxilValueCache.cpp - Dxil Constant Value Cache ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility to compute and cache constant values for instructions.
+//
+
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/Statistic.h"
+
+#include "dxc/HLSL/DxilValueCache.h"
+#include <unordered_set>
+
+#define DEBUG_TYPE "dxil-value-cache"
+
+using namespace llvm;
+
+static
+bool IsConstantTrue(const Value *V) {
+  if (const ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return C->getLimitedValue() != 0;
+  return false;
+}
+static
+bool IsConstantFalse(const Value *V) {
+  if (const ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return C->getLimitedValue() == 0;
+  return false;
+}
+
+static
+bool IsEntryBlock(const BasicBlock *BB) {
+  return BB == &BB->getParent()->getEntryBlock();
+}
+
+void DxilValueCache::MarkAlwaysReachable(BasicBlock *BB) {
+  ValueMap.Set(BB, ConstantInt::get(Type::getInt1Ty(BB->getContext()), 1));
+}
+void DxilValueCache::MarkNeverReachable(BasicBlock *BB) {
+  ValueMap.Set(BB, ConstantInt::get(Type::getInt1Ty(BB->getContext()), 0));
+}
+
+bool DxilValueCache::IsAlwaysReachable_(BasicBlock *BB) {
+  if (Value *V = ValueMap.Get(BB))
+    if (IsConstantTrue(V))
+      return true;
+  return false;
+}
+
+bool DxilValueCache::IsNeverReachable_(BasicBlock *BB) {
+  if (Value *V = ValueMap.Get(BB))
+    if (IsConstantFalse(V))
+      return true;
+  return false;
+}
+
+Value *DxilValueCache::ProcessAndSimplify_PHI(Instruction *I, DominatorTree *DT) {
+  PHINode *PN = cast<PHINode>(I);
+  BasicBlock *SoleIncoming = nullptr;
+
+  Value *Simplified = nullptr;
+  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    if (IsAlwaysReachable_(PredBB)) {
+      SoleIncoming = PredBB;
+      break;
+    }
+    else if (!IsNeverReachable_(PredBB)) {
+      if (SoleIncoming) {
+        SoleIncoming = nullptr;
+        break;
+      }
+      SoleIncoming = PredBB;
+    }
+  }
+
+  if (SoleIncoming) {
+    Value *V = OptionallyGetValue(PN->getIncomingValueForBlock(SoleIncoming));
+    if (isa<Constant>(V))
+      Simplified = V;
+    else if (Instruction *I = dyn_cast<Instruction>(V)) {
+      // If this is an instruction, we have to make sure it
+      // dominates this PHI.
+      // There are several conditions that qualify:
+      //   1. There's only one predecessor
+      //   2. If the instruction is in the entry block, then it must dominate
+      //   3. If we are provided with a Dominator tree, and it decides that
+      //      it dominates.
+      if (PN->getNumIncomingValues() == 1 ||
+        IsEntryBlock(I->getParent()) ||
+        (DT && DT->dominates(I, PN)))
+      {
+        Simplified = I;
+      }
+    }
+  }
+
+  // If we coulnd't deduce it, run the LLVM stock simplification to see
+  // if we could do anything.
+  if (!Simplified)
+    Simplified = llvm::SimplifyInstruction(I, I->getModule()->getDataLayout());
+
+  // One last step, to check if we have anything cached for whatever we
+  // simplified to.
+  if (Simplified)
+    Simplified = OptionallyGetValue(Simplified);
+
+  return Simplified;
+}
+
+Value *DxilValueCache::ProcessAndSimpilfy_Br(Instruction *I, DominatorTree *DT) {
+
+  // The *only* reason we're paying special attention to the
+  // branch inst, is to mark certain Basic Blocks as always
+  // reachable or unreachable.
+
+  BranchInst *Br = cast<BranchInst>(I);
+
+  BasicBlock *BB = Br->getParent();
+  if (Br->isConditional()) {
+
+    BasicBlock *TrueSucc = Br->getSuccessor(0);
+    BasicBlock *FalseSucc = Br->getSuccessor(1);
+
+    Value *Cond = OptionallyGetValue(Br->getCondition());
+
+    if (IsNeverReachable_(BB)) {
+      MarkNeverReachable(FalseSucc);
+      MarkNeverReachable(TrueSucc);
+    }
+    else if (IsConstantTrue(Cond)) {
+      if (IsAlwaysReachable_(BB)) {
+        MarkAlwaysReachable(TrueSucc);
+      }
+      if (FalseSucc->getSinglePredecessor())
+        MarkNeverReachable(FalseSucc);
+    }
+    else if (IsConstantFalse(Cond)) {
+      if (IsAlwaysReachable_(BB)) {
+        MarkAlwaysReachable(FalseSucc);
+      }
+      if (TrueSucc->getSinglePredecessor())
+        MarkNeverReachable(TrueSucc);
+    }
+  }
+  else {
+    BasicBlock *Succ = Br->getSuccessor(0);
+    if (IsAlwaysReachable_(BB))
+      MarkAlwaysReachable(Succ);
+    else if (Succ->getSinglePredecessor() && IsNeverReachable_(BB))
+      MarkNeverReachable(Succ);
+  }
+
+  return nullptr;
+}
+
+
+
+Value *DxilValueCache::SimplifyAndCacheResult(Instruction *I, DominatorTree *DT) {
+
+  const DataLayout &DL = I->getModule()->getDataLayout();
+
+  Value *Simplified = nullptr;
+  if (Instruction::Br == I->getOpcode()) {
+    Simplified = ProcessAndSimpilfy_Br(I, DT);
+  }
+  else if (Instruction::PHI == I->getOpcode()) {
+    Simplified = ProcessAndSimplify_PHI(I, DT);
+  }
+  // The rest of the checks use LLVM stock simplifications
+  else if (I->isBinaryOp()) {
+    Simplified =
+      llvm::SimplifyBinOp(
+        I->getOpcode(),
+        OptionallyGetValue(I->getOperand(0)),
+        OptionallyGetValue(I->getOperand(1)),
+        DL);
+  }
+  else if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    Simplified =
+      llvm::SimplifyCmpInst(Cmp->getPredicate(),
+        OptionallyGetValue(I->getOperand(0)),
+        OptionallyGetValue(I->getOperand(1)),
+        DL);
+  }
+  else if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+    Simplified = 
+      llvm::SimplifySelectInst(
+        OptionallyGetValue(Select->getCondition()),
+        OptionallyGetValue(Select->getTrueValue()),
+        OptionallyGetValue(Select->getFalseValue()),
+        DL
+      );
+  }
+  else if (ExtractElementInst *IE = dyn_cast<ExtractElementInst>(I)) {
+    Simplified =
+      llvm::SimplifyExtractElementInst(
+        OptionallyGetValue(IE->getVectorOperand()),
+        OptionallyGetValue(IE->getIndexOperand()),
+        DL, nullptr, DT);
+  }
+  else if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+    Simplified =
+      llvm::SimplifyCastInst(
+        Cast->getOpcode(),
+        OptionallyGetValue(Cast->getOperand(0)),
+        Cast->getType(), DL);
+  }
+
+  if (Simplified && isa<Constant>(Simplified))
+    ValueMap.Set(I, Simplified);
+
+  return Simplified;
+}
+
+STATISTIC(StaleValuesEncountered, "Stale Values Encountered");
+
+bool DxilValueCache::WeakValueMap::Seen(Value *V) {
+  auto FindIt = Map.find(V);
+  if (FindIt == Map.end())
+    return false;
+
+  auto &Entry = FindIt->second;
+  if (Entry.IsStale())
+    return false;
+  return Entry.Value;
+}
+
+Value *DxilValueCache::WeakValueMap::Get(Value *V) {
+  auto FindIt = Map.find(V);
+  if (FindIt == Map.end())
+    return nullptr;
+
+  auto &Entry = FindIt->second;
+  if (Entry.IsStale())
+    return nullptr;
+
+  Value *Result = Entry.Value;
+  if (Result == GetSentinel(V->getContext()))
+    return nullptr;
+
+  return Result;
+}
+
+void DxilValueCache::WeakValueMap::SetSentinel(Value *Key) {
+  Map[Key].Set(Key, GetSentinel(Key->getContext()));
+}
+
+Value *DxilValueCache::WeakValueMap::GetSentinel(LLVMContext &Ctx) {
+  if (!Sentinel) {
+    Sentinel.reset( PHINode::Create(Type::getInt1Ty(Ctx), 0) );
+  }
+  return Sentinel.get();
+}
+
+LLVM_DUMP_METHOD
+void DxilValueCache::WeakValueMap::dump() const {
+  for (auto It = Map.begin(), E = Map.end(); It != E; It++) {
+    const Value *Key = It->first;
+    if (It->second.IsStale())
+      continue;
+    const Value *V = It->second.Value;
+    bool IsSentinel = Sentinel && V == Sentinel.get();
+    if (const BasicBlock *BB = dyn_cast<BasicBlock>(Key)) {
+      dbgs() << "[BB]" << BB->getName() << " -> ";
+      if (IsSentinel)
+        dbgs() << "NO_VALUE";
+      else {
+        if (IsConstantTrue(V))
+          dbgs() << "Always Reachable!";
+        else if (IsConstantFalse(V))
+          dbgs() << "Never Reachable!";
+      }
+    }
+    else {
+      dbgs() << Key->getName() << " -> ";
+      if (IsSentinel)
+        dbgs() << "NO_VALUE";
+      else
+        dbgs() << *V;
+    }
+    dbgs() << "\n";
+  }
+}
+
+void DxilValueCache::WeakValueMap::Set(Value *Key, Value *V) {
+  Map[Key].Set(Key, V);
+}
+
+// If there's a cached value, return it. Otherwise, return
+// the value itself.
+Value *DxilValueCache::OptionallyGetValue(Value *V) {
+  if (Value *Simplified = ValueMap.Get(V))
+    return Simplified;
+  return V;
+}
+
+DxilValueCache::DxilValueCache() : ModulePass(ID) {
+  initializeDxilValueCachePass(*PassRegistry::getPassRegistry());
+}
+
+const char *DxilValueCache::getPassName() const {
+  return "Dxil Value Cache";
+}
+
+Value *DxilValueCache::GetValue(Value *V, DominatorTree *DT) {
+  if (Value *NewV = ValueMap.Get(V))
+    return NewV;
+  return ProcessValue(V, DT);
+}
+
+bool DxilValueCache::IsAlwaysReachable(BasicBlock *BB, DominatorTree *DT) {
+  ProcessValue(BB, DT);
+  return IsAlwaysReachable_(BB);
+}
+
+bool DxilValueCache::IsNeverReachable(BasicBlock *BB, DominatorTree *DT) {
+  ProcessValue(BB, DT);
+  return IsNeverReachable_(BB);
+}
+
+LLVM_DUMP_METHOD
+void DxilValueCache::dump() const {
+  ValueMap.dump();
+}
+
+Value *DxilValueCache::ProcessValue(Value *NewV, DominatorTree *DT) {
+
+  Value *Result = nullptr;
+
+  SmallVector<Value *, 16> WorkList;
+
+  // Although we accept all values for convenience, we only process
+  // Instructions.
+  if (Instruction *I = dyn_cast<Instruction>(NewV)) {
+    WorkList.push_back(I);
+  }
+  else if (BasicBlock *BB = dyn_cast<BasicBlock>(NewV)) {
+    WorkList.push_back(BB->getTerminator());
+    WorkList.push_back(BB);
+  }
+  else {
+    return nullptr;
+  }
+
+  // Unconditionally process this one instruction, whether we've seen
+  // it or not. The simplification might be able to do something to
+  // simplify it even when we don't have its value cached.
+
+
+  // This is a basic DFS setup.
+  while (WorkList.size()) {
+    Value *V = WorkList.back();
+
+    // If we haven't seen this value, go in and push things it depends on
+    // into the worklist.
+    if (!ValueMap.Seen(V)) {
+      ValueMap.SetSentinel(V);
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+
+        for (Use &U : I->operands()) {
+          Instruction *UseI = dyn_cast<Instruction>(U.get());
+          if (!UseI)
+            continue;
+          if (!ValueMap.Seen(UseI))
+            WorkList.push_back(UseI);
+        }
+
+        if (PHINode *PN = dyn_cast<PHINode>(I)) {
+          for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+            BasicBlock *BB = PN->getIncomingBlock(i);
+            TerminatorInst *Term = BB->getTerminator();
+            if (!ValueMap.Seen(Term))
+              WorkList.push_back(Term);
+            if (!ValueMap.Seen(BB))
+              WorkList.push_back(BB);
+          }
+        }
+      }
+      else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+        if (IsEntryBlock(BB)) {
+          MarkAlwaysReachable(BB);
+        }
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; PI++) {
+          BasicBlock *PredBB = *PI;
+          TerminatorInst *Term = PredBB->getTerminator();
+          if (!ValueMap.Seen(Term))
+            WorkList.push_back(Term);
+          if (!ValueMap.Seen(PredBB))
+            WorkList.push_back(PredBB);
+        }
+      }
+    }
+    // If we've seen this values, all its dependencies must have been processed
+    // as well.
+    else {
+      WorkList.pop_back();
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        Value *SimplifiedValue = SimplifyAndCacheResult(I, DT);
+        // Set the result if this is the input inst.
+        // SimplifyInst may not have cached the value
+        // so we return it directly.
+        if (I == NewV)
+          Result = SimplifiedValue;
+      }
+      else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+        // Deduce the basic block's reachability based on
+        // other analysis.
+        if (!IsEntryBlock(BB)) {
+          bool AllNeverReachable = true;
+          for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; PI++) {
+            if (!IsNeverReachable_(BB)) {
+              AllNeverReachable = false;
+              break;
+            }
+          }
+          if (AllNeverReachable)
+            MarkNeverReachable(BB);
+        }
+
+      }
+    }
+  }
+
+  return Result;
+}
+
+char DxilValueCache::ID;
+
+ModulePass *llvm::createDxilValueCachePass() {
+  return new DxilValueCache();
+}
+
+INITIALIZE_PASS(DxilValueCache, DEBUG_TYPE, "Dxil Value Cache", false, false)
+

+ 1 - 1
lib/HLSL/HLExpandStoreIntrinsics.cpp

@@ -99,7 +99,7 @@ void HLExpandStoreIntrinsics::emitElementStores(CallInst &OriginalCall,
   }
   }
   else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(StackTopTy)) {
   else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(StackTopTy)) {
     unsigned ElemSize = (unsigned)Module.getDataLayout().getTypeAllocSize(ArrayTy->getElementType());
     unsigned ElemSize = (unsigned)Module.getDataLayout().getTypeAllocSize(ArrayTy->getElementType());
-    for (int i = 0; i < ArrayTy->getNumElements(); ++i) {
+    for (int i = 0; i < (int)ArrayTy->getNumElements(); ++i) {
       unsigned ElemOffsetFromBase = OffsetFromBase + ElemSize * i;
       unsigned ElemOffsetFromBase = OffsetFromBase + ElemSize * i;
       GEPIndicesStack.emplace_back(Builder.getInt32(i));
       GEPIndicesStack.emplace_back(Builder.getInt32(i));
       emitElementStores(OriginalCall, GEPIndicesStack, ArrayTy->getElementType(), ElemOffsetFromBase);
       emitElementStores(OriginalCall, GEPIndicesStack, ArrayTy->getElementType(), ElemOffsetFromBase);

+ 4 - 3
lib/HLSL/HLModule.cpp

@@ -945,11 +945,12 @@ unsigned HLModule::GetBindingForResourceInCB(GetElementPtrInst *CbPtr,
 // TODO: Don't check names.
 // TODO: Don't check names.
 bool HLModule::IsStreamOutputType(llvm::Type *Ty) {
 bool HLModule::IsStreamOutputType(llvm::Type *Ty) {
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
-    if (ST->getName().startswith("class.PointStream"))
+    StringRef name = ST->getName();
+    if (name.startswith("class.PointStream"))
       return true;
       return true;
-    if (ST->getName().startswith("class.LineStream"))
+    if (name.startswith("class.LineStream"))
       return true;
       return true;
-    if (ST->getName().startswith("class.TriangleStream"))
+    if (name.startswith("class.TriangleStream"))
       return true;
       return true;
   }
   }
   return false;
   return false;

+ 1 - 21
lib/HLSL/HLOperationLower.cpp

@@ -827,24 +827,6 @@ bool IsValidLoadInput(Value *V) {
   return true;
   return true;
 }
 }
 
 
-// Apply current shuffle vector mask on top of previous shuffle mask.
-// For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
-// new mask would be (13,11,12,10)
-Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
-  if (curMask == nullptr) {
-    return prevMask;
-  }
-  unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
-  SmallVector<uint32_t, 16> Elts;
-  for (unsigned i = 0; i != size; ++i) {
-    ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
-    ConstantInt *IVal =
-        cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
-    Elts.emplace_back(IVal->getSExtValue());
-  }
-  return ConstantDataVector::get(curMask->getContext(), Elts);
-}
-
 // Tunnel through insert/extract element and shuffle to find original source
 // Tunnel through insert/extract element and shuffle to find original source
 // of scalar value, or specified element (vecIdx) of vector value.
 // of scalar value, or specified element (vecIdx) of vector value.
 Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
 Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
@@ -2745,7 +2727,7 @@ struct SampleHelper {
     DXASSERT_NOMSG(compareValue);
     DXASSERT_NOMSG(compareValue);
   }
   }
   void SetClamp(CallInst *CI, unsigned clampIdx) {
   void SetClamp(CallInst *CI, unsigned clampIdx) {
-    if (clamp = ReadHLOperand(CI, clampIdx)) {
+    if ((clamp = ReadHLOperand(CI, clampIdx))) {
       if (clamp->getType()->isVectorTy()) {
       if (clamp->getType()->isVectorTy()) {
         IRBuilder<> Builder(CI);
         IRBuilder<> Builder(CI);
         clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
         clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
@@ -4915,8 +4897,6 @@ Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode
   Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
   Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
   Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
   Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
 
 
-  Value *Args[] = {opArg, handle};
-
   IRBuilder<> Builder(CI);
   IRBuilder<> Builder(CI);
   Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
   Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
 
 

+ 2 - 2
lib/HLSL/HLSignatureLower.cpp

@@ -1064,7 +1064,7 @@ void HLSignatureLower::GenerateDxilInputsOutputs(DXIL::SignatureKind SK) {
   DxilFunctionProps &props = HLM.GetDxilFunctionProps(Entry);
   DxilFunctionProps &props = HLM.GetDxilFunctionProps(Entry);
   Module &M = *(HLM.GetModule());
   Module &M = *(HLM.GetModule());
 
 
-  OP::OpCode opcode;
+  OP::OpCode opcode = (OP::OpCode)-1;
   switch (SK) {
   switch (SK) {
   case DXIL::SignatureKind::Input:
   case DXIL::SignatureKind::Input:
     opcode = OP::OpCode::LoadInput;
     opcode = OP::OpCode::LoadInput;
@@ -1614,7 +1614,7 @@ void HLSignatureLower::GenerateEmitIndicesOperation(Value *indicesOutput) {
     // Skip first pointer idx which must be 0.
     // Skip first pointer idx which must be 0.
     GEPIt++;
     GEPIt++;
     Value *primIdx = GEPIt.getOperand();
     Value *primIdx = GEPIt.getOperand();
-    DXASSERT(++GEPIt == E, "invalid GEP here");
+    DXASSERT(++GEPIt == E, "invalid GEP here"); (void)E;
 
 
     auto GepUser = GEP->user_begin();
     auto GepUser = GEP->user_begin();
     auto GepUserE = GEP->user_end();
     auto GepUserE = GEP->user_end();

+ 28 - 17
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -29,6 +29,7 @@
 #include "dxc/HLSL/DxilGenerationPass.h" // HLSL Change
 #include "dxc/HLSL/DxilGenerationPass.h" // HLSL Change
 #include "dxc/HLSL/HLMatrixLowerPass.h" // HLSL Change
 #include "dxc/HLSL/HLMatrixLowerPass.h" // HLSL Change
 #include "dxc/HLSL/ComputeViewIdState.h" // HLSL Change
 #include "dxc/HLSL/ComputeViewIdState.h" // HLSL Change
+#include "dxc/HLSL/DxilValueCache.h" // HLSL Change
 
 
 using namespace llvm;
 using namespace llvm;
 
 
@@ -207,6 +208,7 @@ void PassManagerBuilder::populateFunctionPassManager(
 
 
 // HLSL Change Starts
 // HLSL Change Starts
 static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
 static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
+
   // Don't do any lowering if we're targeting high-level.
   // Don't do any lowering if we're targeting high-level.
   if (HLSLHighLevel) {
   if (HLSLHighLevel) {
     MPM.add(createHLEmitMetadataPass());
     MPM.add(createHLEmitMetadataPass());
@@ -241,30 +243,31 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
     // Do this before change vector to array.
     // Do this before change vector to array.
     MPM.add(createDxilLegalizeEvalOperationsPass());
     MPM.add(createDxilLegalizeEvalOperationsPass());
   }
   }
-  else {
-    // This should go between matrix lower and dynamic indexing vector to array,
-    // because matrix lower may create dynamically indexed global vectors,
-    // which should become locals. If they are turned into arrays first,
-    // this pass will ignore them as it only works on scalars and vectors.
-    MPM.add(createLowerStaticGlobalIntoAlloca());
-  }
+  // This should go between matrix lower and dynamic indexing vector to array,
+  // because matrix lower may create dynamically indexed global vectors,
+  // which should become locals. If they are turned into arrays first,
+  // this pass will ignore them as it only works on scalars and vectors.
+  MPM.add(createLowerStaticGlobalIntoAlloca());
 
 
   // Change dynamic indexing vector to array.
   // Change dynamic indexing vector to array.
-  MPM.add(createDynamicIndexingVectorToArrayPass(NoOpt));
+  MPM.add(createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */));
+
+  // Rotate the loops before, mem2reg, since it messes up dbg.value's
+  MPM.add(createLoopRotatePass());
 
 
   // mem2reg
   // mem2reg
-  // Special Mem2Reg pass that only happens if optimization is
-  // enabled or loop unroll is needed.
-  MPM.add(createLoopRotatePass()); // Rotate the loops before, mem2reg, since it messes up dbg.value's
+  // Special Mem2Reg pass that skips precise marker.
   MPM.add(createDxilConditionalMem2RegPass(NoOpt));
   MPM.add(createDxilConditionalMem2RegPass(NoOpt));
 
 
   if (!NoOpt) {
   if (!NoOpt) {
     MPM.add(createDxilConvergentMarkPass());
     MPM.add(createDxilConvergentMarkPass());
   }
   }
 
 
-  MPM.add(createSimplifyInstPass());
+  if (!NoOpt)
+    MPM.add(createSimplifyInstPass());
 
 
-  MPM.add(createCFGSimplificationPass());
+  if (!NoOpt)
+    MPM.add(createCFGSimplificationPass());
 
 
   // Passes to handle [unroll]
   // Passes to handle [unroll]
   // Needs to happen after SROA since loop count may depend on
   // Needs to happen after SROA since loop count may depend on
@@ -289,14 +292,20 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   // Propagate precise attribute.
   // Propagate precise attribute.
   MPM.add(createDxilPrecisePropagatePass());
   MPM.add(createDxilPrecisePropagatePass());
 
 
-  MPM.add(createSimplifyInstPass());
+  if (!NoOpt)
+    MPM.add(createSimplifyInstPass());
 
 
   // scalarize vector to scalar
   // scalarize vector to scalar
-  MPM.add(createScalarizerPass());
+  MPM.add(createScalarizerPass(!NoOpt /* AllowFolding */));
 
 
-  MPM.add(createSimplifyInstPass());
+  if (!NoOpt)
+    MPM.add(createSimplifyInstPass());
 
 
-  MPM.add(createCFGSimplificationPass());
+  if (!NoOpt)
+    MPM.add(createCFGSimplificationPass());
+
+  // Remove vector instructions
+  MPM.add(createDxilEliminateVectorPass());
 
 
   MPM.add(createDeadCodeEliminationPass());
   MPM.add(createDeadCodeEliminationPass());
 
 
@@ -313,6 +322,7 @@ void PassManagerBuilder::populateModulePassManager(
   if (OptLevel == 0) {
   if (OptLevel == 0) {
     if (!HLSLHighLevel) {
     if (!HLSLHighLevel) {
       MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
       MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
+      MPM.add(createDxilInsertNoopsPass()); // HLSL Change - insert noop instructions
     }
     }
 
 
     if (Inliner) {
     if (Inliner) {
@@ -339,6 +349,7 @@ void PassManagerBuilder::populateModulePassManager(
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilLegalizeSampleOffsetPass());
       MPM.add(createDxilLegalizeSampleOffsetPass());
+      MPM.add(createDxilFinalizeNoopsPass());
       MPM.add(createDxilFinalizeModulePass());
       MPM.add(createDxilFinalizeModulePass());
       MPM.add(createComputeViewIdStatePass());
       MPM.add(createComputeViewIdStatePass());
       MPM.add(createDxilDeadFunctionEliminationPass());
       MPM.add(createDxilDeadFunctionEliminationPass());

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -48,6 +48,7 @@ add_llvm_library(LLVMScalarOpts
   DxilLoopUnroll.cpp # HLSL Change
   DxilLoopUnroll.cpp # HLSL Change
   DxilEraseDeadRegion.cpp # HLSL Change
   DxilEraseDeadRegion.cpp # HLSL Change
   DxilFixConstArrayInitializer.cpp # HLSL Change
   DxilFixConstArrayInitializer.cpp # HLSL Change
+  DxilEliminateVector.cpp # HLSL Change
   Scalarizer.cpp
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
   SeparateConstOffsetFromGEP.cpp
   SimplifyCFGPass.cpp
   SimplifyCFGPass.cpp

+ 239 - 0
lib/Transforms/Scalar/DxilEliminateVector.cpp

@@ -0,0 +1,239 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilEliminateVector.cpp                                                   //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// A pass to remove vector instructions, especially in situations where      //
+// optimizations are turned off.                                             //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/DIBuilder.h"
+
+#include "dxc/HLSL/DxilValueCache.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+class DxilEliminateVector : public FunctionPass {
+public:
+  static char ID;
+  DxilEliminateVector() : FunctionPass(ID) {
+    initializeDxilEliminateVectorPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DxilValueCache>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.setPreservesAll(); // DxilValueCache is safe. CFG is not changed, so DT is okay.
+  }
+
+  bool TryRewriteDebugInfoForVector(InsertElementInst *IE);
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override { return "Dxil Eliminate Vector"; }
+};
+
+char DxilEliminateVector::ID;
+}
+
+static
+MetadataAsValue *GetAsMetadata(Instruction *I) {
+  if (auto *L = LocalAsMetadata::getIfExists(I)) {
+    if (auto *DINode = MetadataAsValue::getIfExists(I->getContext(), L)) {
+      return DINode;
+    }
+  }
+  return nullptr;
+}
+
+static
+bool CollectVectorElements(Value *V, SmallVector<Value *, 4> &Elements) {
+  if (InsertElementInst *IE = dyn_cast<InsertElementInst>(V)) {
+
+    Value *Vec = IE->getOperand(0);
+    Value *Element = IE->getOperand(1);
+    Value *Index = IE->getOperand(2);
+
+    if (!isa<UndefValue>(Vec)) {
+      if (!CollectVectorElements(Vec, Elements))
+        return false;
+    }
+
+    ConstantInt *ConstIndex = dyn_cast<ConstantInt>(Index);
+    if (!ConstIndex)
+      return false;
+
+    uint64_t IdxValue = ConstIndex->getLimitedValue();
+    if (IdxValue < 4) {
+      if (Elements.size() <= IdxValue)
+        Elements.resize(IdxValue+1);
+      Elements[IdxValue] = Element;
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool HasDebugValue(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  MetadataAsValue *DebugI = GetAsMetadata(I);
+  if (!DebugI) return false;
+
+  for (User *U : DebugI->users()) {
+    if (isa<DbgValueInst>(U))
+      return true;
+  }
+
+  return false;
+}
+
+bool DxilEliminateVector::TryRewriteDebugInfoForVector(InsertElementInst *IE) {
+
+  // If this is not ever used as meta-data, there's no debug
+  MetadataAsValue *DebugI = GetAsMetadata(IE);
+  if (!DebugI)
+    return false;
+
+  // Collect @dbg.value instructions
+  SmallVector<DbgValueInst *, 4> DbgValueInsts;
+  for (User *U : DebugI->users()) {
+    if (DbgValueInst *DbgValueI = dyn_cast<DbgValueInst>(U)) {
+      DbgValueInsts.push_back(DbgValueI);
+    }
+  }
+
+  if (!DbgValueInsts.size())
+    return false;
+
+  SmallVector<Value *, 4> Elements;
+  if (!CollectVectorElements(IE, Elements))
+    return false;
+
+  DIBuilder DIB(*IE->getModule());
+  const DataLayout &DL = IE->getModule()->getDataLayout();
+
+  // Go through the elements and create @dbg.value with bit-piece
+  // expressions for them.
+  bool Changed = false;
+  for (DbgValueInst *DVI : DbgValueInsts) {
+
+    DIExpression *ParentExpr = DVI->getExpression();
+    unsigned BitpieceOffset = 0;
+    if (ParentExpr->isBitPiece())
+      BitpieceOffset = ParentExpr->getBitPieceOffset();
+
+    for (unsigned i = 0; i < Elements.size(); i++) {
+      if (!Elements[i])
+        continue;
+
+      if (HasDebugValue(Elements[i]))
+        continue;
+
+      unsigned ElementSize = DL.getTypeAllocSizeInBits(Elements[i]->getType());
+      DIExpression *Expr = DIB.createBitPieceExpression(BitpieceOffset + i * ElementSize, ElementSize);
+      DIB.insertDbgValueIntrinsic(Elements[i], 0, DVI->getVariable(), Expr, DVI->getDebugLoc(), DVI);
+
+      Changed = true;
+    }
+
+    DVI->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+bool DxilEliminateVector::runOnFunction(Function &F) {
+
+  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
+
+  std::vector<Instruction *> VectorInsts;
+  std::vector<AllocaInst *> VectorAllocas;
+
+  // Collect the vector insts and allocas.
+  for (auto &BB : F) {
+    for (auto &I : BB)
+      if (isa<InsertElementInst>(&I) || isa<ExtractElementInst>(&I))
+        VectorInsts.push_back(&I);
+      else if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+        if (AI->getAllocatedType()->isVectorTy() && llvm::isAllocaPromotable(AI))
+          VectorAllocas.push_back(AI);
+      }
+  }
+
+  if (!VectorInsts.size())
+    return false;
+
+  bool Changed = false;
+
+  // Promote the allocas if they exist. They could very well exist
+  // because of precise.
+  if (VectorAllocas.size()) {
+    PromoteMemToReg(VectorAllocas, *DT);
+    Changed = true;
+  }
+
+  // Iteratively try to remove them, untill all gone or unable to
+  // do it anymore.
+  unsigned Attempts = VectorInsts.size();
+  for (unsigned i = 0; i < Attempts; i++) {
+    bool LocalChange = false;
+
+    for (unsigned j = 0; j < VectorInsts.size();) {
+      auto *I = VectorInsts[j];
+      bool Remove = false;
+
+      if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+        TryRewriteDebugInfoForVector(IE);
+      }
+
+      if (Value *V = DVC->GetValue(I, DT)) {
+        I->replaceAllUsesWith(V);
+        Remove = true;
+      }
+      else if (I->user_empty()) {
+        Remove = true;
+      }
+
+      // Do the remove
+      if (Remove) {
+        LocalChange = true;
+        I->eraseFromParent();
+        VectorInsts.erase(VectorInsts.begin() + j);
+      }
+      else {
+        j++;
+      }
+    }
+
+    Changed |= LocalChange;
+    if (!LocalChange)
+      break;
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createDxilEliminateVectorPass() {
+  return new DxilEliminateVector();
+}
+
+INITIALIZE_PASS(DxilEliminateVector, "dxil-elim-vector", "Dxil Eliminate Vectors", false, false)

+ 3 - 3
lib/Transforms/Scalar/DxilEraseDeadRegion.cpp

@@ -56,9 +56,9 @@ struct DxilEraseDeadRegion : public FunctionPass {
     return false;
     return false;
   }
   }
 
 
-  bool FindDeadRegion(PostDominatorTree *PDT, BasicBlock *Begin, BasicBlock *End, std::set<BasicBlock *> &Region) {
+  bool FindDeadRegion(BasicBlock *Begin, BasicBlock *End, std::set<BasicBlock *> &Region) {
     std::vector<BasicBlock *> WorkList;
     std::vector<BasicBlock *> WorkList;
-    auto ProcessSuccessors = [this, &WorkList, Begin, End, &Region, PDT](BasicBlock *BB) {
+    auto ProcessSuccessors = [this, &WorkList, Begin, End, &Region](BasicBlock *BB) {
       for (BasicBlock *Succ : successors(BB)) {
       for (BasicBlock *Succ : successors(BB)) {
         if (Succ == End) continue;
         if (Succ == End) continue;
         if (Succ == Begin) return false; // If goes back to the beginning, there's a loop, give up.
         if (Succ == Begin) return false; // If goes back to the beginning, there's a loop, give up.
@@ -115,7 +115,7 @@ struct DxilEraseDeadRegion : public FunctionPass {
       return false;
       return false;
 
 
     std::set<BasicBlock *> Region;
     std::set<BasicBlock *> Region;
-    if (!this->FindDeadRegion(PDT, Common, BB, Region))
+    if (!this->FindDeadRegion(Common, BB, Region))
       return false;
       return false;
 
 
     // If BB branches INTO the region, forming a loop give up.
     // If BB branches INTO the region, forming a loop give up.

+ 4 - 23
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -1129,10 +1129,10 @@ public:
   static char ID;
   static char ID;
 
 
   // Function overrides that resolve options when used for DxOpt
   // Function overrides that resolve options when used for DxOpt
-  void applyOptions(PassOptions O) {
+  void applyOptions(PassOptions O) override {
     GetPassOptionBool(O, "NoOpt", &NoOpt, false);
     GetPassOptionBool(O, "NoOpt", &NoOpt, false);
   }
   }
-  void dumpConfig(raw_ostream &OS) {
+  void dumpConfig(raw_ostream &OS) override {
     FunctionPass::dumpConfig(OS);
     FunctionPass::dumpConfig(OS);
     OS << ",NoOpt=" << NoOpt;
     OS << ",NoOpt=" << NoOpt;
   }
   }
@@ -1144,10 +1144,8 @@ public:
   }
   }
 
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequiredID(LoopSimplifyID);
     AU.setPreservesCFG();
     AU.setPreservesCFG();
   }
   }
 
 
@@ -1224,33 +1222,16 @@ public:
     return Changed;
     return Changed;
   }
   }
 
 
-  bool runOnFunction(Function &F) {
+  bool runOnFunction(Function &F) override {
 
 
 
 
-    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
     AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
 
-    bool NeedPromote = false;
     bool Changed = false;
     bool Changed = false;
     
     
     Changed |= RemoveAllUnusedAllocas(F);
     Changed |= RemoveAllUnusedAllocas(F);
-
-    if (NoOpt) {
-      // If any of the functions are marked as full unroll.
-      for (Loop *L : *LI) {
-        if (HasLoopsMarkedUnrollRecursive(L)) {
-          NeedPromote = true;
-          break;
-        }
-      }
-    }
-    else {
-      NeedPromote = true;
-    }
-
-    if (NeedPromote)
-      Changed |= Mem2Reg(F, *DT, *AC);
+    Changed |= Mem2Reg(F, *DT, *AC);
 
 
     return Changed;
     return Changed;
   }
   }

+ 2 - 9
lib/Transforms/Scalar/LowerTypePasses.cpp

@@ -110,16 +110,9 @@ bool LowerTypePass::runOnFunction(Function &F, bool HasDbgInfo) {
   for (AllocaInst *A : workList) {
   for (AllocaInst *A : workList) {
     AllocaInst *NewA = lowerAlloca(A);
     AllocaInst *NewA = lowerAlloca(A);
     if (HasDbgInfo) {
     if (HasDbgInfo) {
-      // Add debug info.
+      // Migrate debug info.
       DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A);
       DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A);
-      if (DDI) {
-        Value *DDIVar = MetadataAsValue::get(Context, DDI->getRawVariable());
-        Value *DDIExp = MetadataAsValue::get(Context, DDI->getRawExpression());
-        Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(NewA));
-        IRBuilder<> debugBuilder(DDI);
-        debugBuilder.CreateCall(DDI->getCalledFunction(),
-                                {VMD, DDIVar, DDIExp});
-      }
+      if (DDI) DDI->setOperand(0, MetadataAsValue::get(Context, LocalAsMetadata::get(NewA)));
     }
     }
     // Replace users.
     // Replace users.
     lowerUseWithNewValue(A, NewA);
     lowerUseWithNewValue(A, NewA);

+ 35 - 0
lib/Transforms/Scalar/SROA.cpp

@@ -57,6 +57,7 @@
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "dxc/DXIL/DxilUtil.h"  // HLSL Change - don't sroa resource type.
 #include "dxc/DXIL/DxilUtil.h"  // HLSL Change - don't sroa resource type.
+#include "dxc/DXIL/DxilMetadataHelper.h"  // HLSL Change - support strided debug variables
 #include "dxc/HLSL/HLMatrixType.h"  // HLSL Change - don't sroa matrix types.
 #include "dxc/HLSL/HLMatrixType.h"  // HLSL Change - don't sroa matrix types.
 
 
 #if __cplusplus >= 201103L && !defined(NDEBUG)
 #if __cplusplus >= 201103L && !defined(NDEBUG)
@@ -4310,11 +4311,29 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
     DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
     DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
                   /*AllowUnresolved*/ false);
                   /*AllowUnresolved*/ false);
     bool IsSplit = Pieces.size() > 1;
     bool IsSplit = Pieces.size() > 1;
+
+    // HLSL Change Begins
+    // Take into account debug stride in extra metadata
+    std::vector<hlsl::DxilDIArrayDim> ArrayDims;
+    unsigned FirstFragmentOffsetInBits = 0;
+    if (!hlsl::DxilMDHelper::GetVariableDebugLayout(DbgDecl, FirstFragmentOffsetInBits, ArrayDims)
+      && Expr->isBitPiece()) {
+      FirstFragmentOffsetInBits = Expr->getBitPieceOffset();
+    }
+
+    unsigned FragmentSizeInBits = DL.getTypeAllocSizeInBits(AI.getAllocatedType());
+    for (const hlsl::DxilDIArrayDim& ArrayDim : ArrayDims) {
+      assert(FragmentSizeInBits % ArrayDim.NumElements == 0);
+      FragmentSizeInBits /= ArrayDim.NumElements;
+    }
+    // HLSL Change Ends
+
     for (auto Piece : Pieces) {
     for (auto Piece : Pieces) {
       // Create a piece expression describing the new partition or reuse AI's
       // Create a piece expression describing the new partition or reuse AI's
       // expression if there is only one partition.
       // expression if there is only one partition.
       auto *PieceExpr = Expr;
       auto *PieceExpr = Expr;
       if (IsSplit || Expr->isBitPiece()) {
       if (IsSplit || Expr->isBitPiece()) {
+#if 0 // HLSL Change - Handle Strides
         // If this alloca is already a scalar replacement of a larger aggregate,
         // If this alloca is already a scalar replacement of a larger aggregate,
         // Piece.Offset describes the offset inside the scalar.
         // Piece.Offset describes the offset inside the scalar.
         uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
         uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
@@ -4327,6 +4346,22 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
             continue;
             continue;
           Size = std::min(Size, AbsEnd - Start);
           Size = std::min(Size, AbsEnd - Start);
         }
         }
+// HLSL Change Begins
+#else
+        // Find the fragment from the original user variable in which this piece falls
+        uint64_t PieceFragmentIndex = Piece.Offset / FragmentSizeInBits;
+
+        // Compute the offset in the original user variable
+        uint64_t StartInFragment = Piece.Offset % FragmentSizeInBits;
+        uint64_t Start = FirstFragmentOffsetInBits + Piece.Offset % FragmentSizeInBits;
+        for (auto ArrayDimIter = ArrayDims.rbegin(); ArrayDimIter != ArrayDims.rend(); ++ArrayDimIter) {
+          Start += ArrayDimIter->StrideInBits * (PieceFragmentIndex % ArrayDimIter->NumElements);
+          PieceFragmentIndex /= ArrayDimIter->NumElements;
+        }
+
+        uint64_t Size = std::min<uint64_t>(Piece.Size, FragmentSizeInBits - StartInFragment);
+#endif
+// HLSL Change Ends
         PieceExpr = DIB.createBitPieceExpression(Start, Size);
         PieceExpr = DIB.createBitPieceExpression(Start, Size);
       }
       }
 
 

+ 194 - 50
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -79,6 +79,7 @@ public:
   // Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
   // Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
   // Then do SROA on V.
   // Then do SROA on V.
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
+                                  Type *&BrokenUpTy, uint64_t &NumInstances,
                                   IRBuilder<> &Builder, bool bFlatVector,
                                   IRBuilder<> &Builder, bool bFlatVector,
                                   bool hasPrecise, DxilTypeSystem &typeSys,
                                   bool hasPrecise, DxilTypeSystem &typeSys,
                                   const DataLayout &DL,
                                   const DataLayout &DL,
@@ -317,6 +318,10 @@ bool SROA_HLSL::runOnFunction(Function &F) {
   HLModule &HLM = M->GetOrCreateHLModule();
   HLModule &HLM = M->GetOrCreateHLModule();
   DxilTypeSystem &typeSys = HLM.GetTypeSystem();
   DxilTypeSystem &typeSys = HLM.GetTypeSystem();
 
 
+  // Establish debug metadata layout name in the context in advance so the name
+  // is serialized in both debug and non-debug compilations.
+  (void)M->getContext().getMDKindID(DxilMDHelper::kDxilVariableDebugLayoutMDName);
+
   bool Changed = performScalarRepl(F, typeSys);
   bool Changed = performScalarRepl(F, typeSys);
   // change rest memcpy into ld/st.
   // change rest memcpy into ld/st.
   MemcpySplitter splitter(F.getContext(), typeSys);
   MemcpySplitter splitter(F.getContext(), typeSys);
@@ -778,6 +783,124 @@ static unsigned getNestedLevelInStruct(const Type *ty) {
   return lvl;
   return lvl;
 }
 }
 
 
+// After SROA'ing a given value into a series of elements,
+// creates the debug info for the storage of the individual elements.
+static void addDebugInfoForElements(Value *ParentVal,
+    Type *BrokenUpTy, uint64_t NumInstances,
+    ArrayRef<Value*> Elems, const DataLayout &DatLayout,
+    DIBuilder *DbgBuilder) {
+
+  // Extract the data we need from the parent value,
+  // depending on whether it is an alloca, argument or global variable.
+  Type *ParentTy;
+  unsigned ParentBitPieceOffset = 0;
+  std::vector<DxilDIArrayDim> DIArrayDims;
+  DIVariable *ParentDbgVariable;
+  DIExpression *ParentDbgExpr;
+  DILocation *ParentDbgLocation;
+  Instruction *DbgDeclareInsertPt = nullptr;
+  if (isa<GlobalVariable>(ParentVal)) {
+    llvm_unreachable("Not implemented: sroa debug info propagation for global vars.");
+  }
+  else {
+    if (AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal))
+      ParentTy = ParentAlloca->getAllocatedType();
+    else
+      ParentTy = cast<Argument>(ParentVal)->getType();
+
+    DbgDeclareInst *ParentDbgDeclare = llvm::FindAllocaDbgDeclare(ParentVal);
+    if (ParentDbgDeclare == nullptr) return;
+
+    // Get the bit piece offset
+    if ((ParentDbgExpr = ParentDbgDeclare->getExpression())) {
+      if (ParentDbgExpr->isBitPiece()) {
+        ParentBitPieceOffset = ParentDbgExpr->getBitPieceOffset();
+      }
+    }
+    
+    ParentDbgVariable = ParentDbgDeclare->getVariable();
+    ParentDbgLocation = ParentDbgDeclare->getDebugLoc();
+    DbgDeclareInsertPt = ParentDbgDeclare;
+
+    // Read the extra layout metadata, if any
+    unsigned ParentBitPieceOffsetFromMD = 0;
+    if (DxilMDHelper::GetVariableDebugLayout(ParentDbgDeclare, ParentBitPieceOffsetFromMD, DIArrayDims)) {
+      // The offset is redundant for local variables and only necessary for global variables.
+      DXASSERT(ParentBitPieceOffsetFromMD == ParentBitPieceOffset,
+        "Bit piece offset mismatch between llvm.dbg.declare and DXIL metadata.");
+    }
+  }
+
+  // If the type that was broken up is nested in arrays,
+  // then each element will also be an array,
+  // but the continuity between successive elements of the original aggregate
+  // will have been broken, such that we must store the stride to rebuild it.
+  // For example: [2 x {i32, float}] => [2 x i32], [2 x float], each with stride 64 bits
+  if (NumInstances > 1 && Elems.size() > 1) {
+    // Existing dimensions already account for part of the stride
+    uint64_t NewDimNumElements = NumInstances;
+    for (const DxilDIArrayDim& ArrayDim : DIArrayDims) {
+      DXASSERT(NewDimNumElements % ArrayDim.NumElements == 0,
+        "Debug array stride is inconsistent with the number of elements.");
+      NewDimNumElements /= ArrayDim.NumElements;
+    }
+
+    // Add a stride dimension
+    DxilDIArrayDim NewDIArrayDim = {};
+    NewDIArrayDim.StrideInBits = (unsigned)DatLayout.getTypeAllocSizeInBits(BrokenUpTy);
+    NewDIArrayDim.NumElements = (unsigned)NewDimNumElements;
+    DIArrayDims.emplace_back(NewDIArrayDim);
+  }
+  else {
+    DIArrayDims.clear();
+  }
+
+  // Create the debug info for each element
+  for (unsigned ElemIdx = 0; ElemIdx < Elems.size(); ++ElemIdx) {
+    // Figure out the offset of the element in the broken up type
+    unsigned ElemBitPieceOffset = ParentBitPieceOffset;
+    if (StructType *ParentStructTy = dyn_cast<StructType>(BrokenUpTy)) {
+      DXASSERT_NOMSG(Elems.size() == ParentStructTy->getNumElements());
+      ElemBitPieceOffset += (unsigned)DatLayout.getStructLayout(ParentStructTy)->getElementOffsetInBits(ElemIdx);
+    }
+    else if (VectorType *ParentVecTy = dyn_cast<VectorType>(BrokenUpTy)) {
+      DXASSERT_NOMSG(Elems.size() == ParentVecTy->getNumElements());
+      ElemBitPieceOffset += (unsigned)DatLayout.getTypeStoreSizeInBits(ParentVecTy->getElementType()) * ElemIdx;
+    }
+    else if (ArrayType *ParentArrayTy = dyn_cast<ArrayType>(BrokenUpTy)) {
+      DXASSERT_NOMSG(Elems.size() == ParentArrayTy->getNumElements());
+      ElemBitPieceOffset += (unsigned)DatLayout.getTypeStoreSizeInBits(ParentArrayTy->getElementType()) * ElemIdx;
+    }
+
+    // The bit_piece can only represent the leading contiguous bytes.
+    // If strides are involved, we'll need additional metadata.
+    Type *ElemTy = Elems[ElemIdx]->getType()->getPointerElementType();
+    unsigned ElemBitPieceSize = (unsigned)DatLayout.getTypeAllocSizeInBits(ElemTy);
+    for (const DxilDIArrayDim& ArrayDim : DIArrayDims)
+      ElemBitPieceSize /= ArrayDim.NumElements;
+
+    if (AllocaInst *ElemAlloca = dyn_cast<AllocaInst>(Elems[ElemIdx])) {
+      // Local variables get an @llvm.dbg.declare plus optional metadata for layout stride information.
+      DIExpression *ElemDbgExpr = nullptr;
+      if (ElemBitPieceOffset == 0 && DatLayout.getTypeAllocSizeInBits(ParentTy) == ElemBitPieceSize) {
+        ElemDbgExpr = DbgBuilder->createExpression();
+      }
+      else {
+        ElemDbgExpr = DbgBuilder->createBitPieceExpression(ElemBitPieceOffset, ElemBitPieceSize);
+      }
+
+      DXASSERT_NOMSG(DbgBuilder != nullptr);
+      DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DbgBuilder->insertDeclare(
+        ElemAlloca, cast<DILocalVariable>(ParentDbgVariable), ElemDbgExpr, ParentDbgLocation, DbgDeclareInsertPt));
+
+      if (!DIArrayDims.empty()) DxilMDHelper::SetVariableDebugLayout(EltDDI, ElemBitPieceOffset, DIArrayDims);
+    }
+    else {
+      llvm_unreachable("Non-AllocaInst SROA'd elements.");
+    }
+  }
+}
+
 /// Returns first GEP index that indexes a struct member, or 0 otherwise.
 /// Returns first GEP index that indexes a struct member, or 0 otherwise.
 /// Ignores initial ptr index.
 /// Ignores initial ptr index.
 static unsigned FindFirstStructMemberIdxInGEP(GEPOperator *GEP) {
 static unsigned FindFirstStructMemberIdxInGEP(GEPOperator *GEP) {
@@ -993,11 +1116,6 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
   std::priority_queue<AllocaInst *, std::vector<AllocaInst *>,
   std::priority_queue<AllocaInst *, std::vector<AllocaInst *>,
                       std::function<bool(AllocaInst *, AllocaInst *)>>
                       std::function<bool(AllocaInst *, AllocaInst *)>>
       WorkList(size_cmp);
       WorkList(size_cmp);
-  std::unordered_map<AllocaInst*, DbgDeclareInst*> DDIMap;
-  // HLSL Change - Begin
-  std::unordered_map<AllocaInst*, unsigned> OffsetMap; // Map to keep track the offset of an alloca
-                                                       // in the variable that it's a part of.
-  // HLSL Change - End
   // Scan the entry basic block, adding allocas to the worklist.
   // Scan the entry basic block, adding allocas to the worklist.
   BasicBlock &BB = F.getEntryBlock();
   BasicBlock &BB = F.getEntryBlock();
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
@@ -1006,9 +1124,6 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
         WorkList.push(A);
         WorkList.push(A);
         // merge GEP use for the allocs
         // merge GEP use for the allocs
         HLModule::MergeGepUse(A);
         HLModule::MergeGepUse(A);
-        if (DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A)) {
-          DDIMap[A] = DDI;
-        }
       }
       }
     }
     }
 
 
@@ -1074,9 +1189,11 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
       IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
       IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
       bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
       bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
 
 
+      Type *BrokenUpTy = nullptr;
+      uint64_t NumInstances = 1;
       bool SROAed = SROA_Helper::DoScalarReplacement(
       bool SROAed = SROA_Helper::DoScalarReplacement(
-          AI, Elts, Builder, /*bFlatVector*/ true, hasPrecise, typeSys, DL,
-          DeadInsts);
+        AI, Elts, BrokenUpTy, NumInstances, Builder,
+        /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts);
 
 
       if (SROAed) {
       if (SROAed) {
         Type *Ty = AI->getAllocatedType();
         Type *Ty = AI->getAllocatedType();
@@ -1096,45 +1213,13 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
             }
             }
           }
           }
         }
         }
-// HLSL Change - Begin
-        unsigned parentOffset = 0;
-        auto offsetIt = OffsetMap.find(AI);
-        if (offsetIt != OffsetMap.end())
-          parentOffset = offsetIt->second;
-// HLSL Change - End
-
-        DbgDeclareInst *DDI = nullptr;
-        unsigned debugOffset = 0;
-        auto iter = DDIMap.find(AI);
-        if (iter != DDIMap.end()) {
-          DDI = iter->second;
-        }
-        // Push Elts into workList.
-        for (auto iter = Elts.begin(); iter != Elts.end(); iter++) {
-          AllocaInst *Elt = cast<AllocaInst>(*iter);
-          WorkList.push(Elt);
-          if (DDI) {
-            Type *Ty = Elt->getAllocatedType();
-            unsigned size = DL.getTypeAllocSize(Ty);
-#if 0 // HLSL Change
-            DIExpression *DDIExp =
-                DIB.createBitPieceExpression(debugOffset, size);
-#else // HLSL Change
 
 
-            DIExpression *DDIExp = nullptr;
-            if (parentOffset+debugOffset == 0 && DL.getTypeAllocSize(AI->getAllocatedType()) == size) {
-              DDIExp = DIB.createExpression();
-            }
-            else {
-              DDIExp = DIB.createBitPieceExpression((parentOffset+debugOffset) * 8, size * 8);
-            }
-            OffsetMap[Elt] = parentOffset+debugOffset;
-#endif // HLSL Change
-            debugOffset += size;
-            DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DIB.insertDeclare(
-                Elt, DDI->getVariable(), DDIExp, DDI->getDebugLoc(), DDI));
-            DDIMap[Elt] = EltDDI;
-          }
+        addDebugInfoForElements(AI, BrokenUpTy, NumInstances, Elts, DL, &DIB);
+
+        // Push Elts into workList.
+        for (unsigned EltIdx = 0; EltIdx < Elts.size(); ++EltIdx) {
+          AllocaInst *EltAlloca = cast<AllocaInst>(Elts[EltIdx]);
+          WorkList.push(EltAlloca);
         }
         }
 
 
         // Now erase any instructions that were made dead while rewriting the
         // Now erase any instructions that were made dead while rewriting the
@@ -2927,6 +3012,7 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
 /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
 /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
 /// Then do SROA on V.
 /// Then do SROA on V.
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
+                                      Type *&BrokenUpTy, uint64_t &NumInstances,
                                       IRBuilder<> &Builder, bool bFlatVector,
                                       IRBuilder<> &Builder, bool bFlatVector,
                                       bool hasPrecise, DxilTypeSystem &typeSys,
                                       bool hasPrecise, DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
                                       const DataLayout &DL,
@@ -2953,6 +3039,9 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       return false;
       return false;
     }
     }
 
 
+    BrokenUpTy = ST;
+    NumInstances = 1;
+
     unsigned numTypes = ST->getNumContainedTypes();
     unsigned numTypes = ST->getNumContainedTypes();
     Elts.reserve(numTypes);
     Elts.reserve(numTypes);
     DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
     DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
@@ -2978,14 +3067,16 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
     }
     }
     Type *ElTy = AT->getElementType();
     Type *ElTy = AT->getElementType();
     SmallVector<ArrayType *, 4> nestArrayTys;
     SmallVector<ArrayType *, 4> nestArrayTys;
-
     nestArrayTys.emplace_back(AT);
     nestArrayTys.emplace_back(AT);
+    NumInstances = AT->getNumElements();
     // support multi level of array
     // support multi level of array
     while (ElTy->isArrayTy()) {
     while (ElTy->isArrayTy()) {
       ArrayType *ElAT = cast<ArrayType>(ElTy);
       ArrayType *ElAT = cast<ArrayType>(ElTy);
       nestArrayTys.emplace_back(ElAT);
       nestArrayTys.emplace_back(ElAT);
+      NumInstances *= ElAT->getNumElements();
       ElTy = ElAT->getElementType();
       ElTy = ElAT->getElementType();
     }
     }
+    BrokenUpTy = ElTy;
 
 
     if (ElTy->isStructTy() &&
     if (ElTy->isStructTy() &&
         // Skip Matrix type.
         // Skip Matrix type.
@@ -3020,6 +3111,8 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
             // Only support 1 dim split.
             // Only support 1 dim split.
             nestArrayTys.size() > 1)
             nestArrayTys.size() > 1)
           return false;
           return false;
+        BrokenUpTy = AT;
+        NumInstances = 1;
         for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
         for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
           AllocaInst *NA = AllocaBuilder.CreateAlloca(ElTy, nullptr,
           AllocaInst *NA = AllocaBuilder.CreateAlloca(ElTy, nullptr,
                                                 V->getName() + "." + Twine(i));
                                                 V->getName() + "." + Twine(i));
@@ -3034,6 +3127,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       // for array of vector
       // for array of vector
       // split into arrays of scalar
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
       VectorType *ElVT = cast<VectorType>(ElTy);
+      BrokenUpTy = ElVT;
       Elts.reserve(ElVT->getNumElements());
       Elts.reserve(ElVT->getNumElements());
 
 
       ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
       ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
@@ -5096,9 +5190,12 @@ void SROA_Parameter_HLSL::flattenArgument(
 
 
     // Not flat vector for entry function currently.
     // Not flat vector for entry function currently.
     bool SROAed = false;
     bool SROAed = false;
+    Type *BrokenUpTy = nullptr;
+    uint64_t NumInstances = 1;
     if (inputQual != DxilParamInputQual::InPayload) {
     if (inputQual != DxilParamInputQual::InPayload) {
       SROAed = SROA_Helper::DoScalarReplacement(
       SROAed = SROA_Helper::DoScalarReplacement(
-        V, Elts, Builder, /*bFlatVector*/ false, annotation.IsPrecise(),
+        V, Elts, BrokenUpTy, NumInstances, Builder, 
+        /*bFlatVector*/ false, annotation.IsPrecise(),
         dxilTypeSys, DL, DeadInsts);
         dxilTypeSys, DL, DeadInsts);
     }
     }
 
 
@@ -6073,6 +6170,7 @@ ModulePass *llvm::createSROA_Parameter_HLSL() {
 namespace {
 namespace {
 class LowerStaticGlobalIntoAlloca : public ModulePass {
 class LowerStaticGlobalIntoAlloca : public ModulePass {
   HLModule *m_pHLModule;
   HLModule *m_pHLModule;
+  DebugInfoFinder m_DbgFinder;
 
 
 public:
 public:
   static char ID; // Pass identification, replacement for typeid
   static char ID; // Pass identification, replacement for typeid
@@ -6081,6 +6179,7 @@ public:
 
 
   bool runOnModule(Module &M) override {
   bool runOnModule(Module &M) override {
     m_pHLModule = &M.GetOrCreateHLModule();
     m_pHLModule = &M.GetOrCreateHLModule();
+    m_DbgFinder.processModule(M);
 
 
     // Lower static global into allocas.
     // Lower static global into allocas.
     std::vector<GlobalVariable *> staticGVs;
     std::vector<GlobalVariable *> staticGVs;
@@ -6109,6 +6208,49 @@ private:
 };
 };
 }
 }
 
 
+static
+DIGlobalVariable *FindGlobalVariableFor(const DebugInfoFinder &DbgFinder, GlobalVariable *GV) {
+  for (auto *DGV : DbgFinder.global_variables()) {
+    if (DGV->getVariable() == GV) {
+      return DGV;
+    }
+  }
+  return nullptr;
+}
+
+static
+void PatchDebugInfo(const DebugInfoFinder &DbgFinder, Function *F, GlobalVariable *GV, AllocaInst *AI) {
+  if (!DbgFinder.compile_unit_count())
+    return;
+
+  // Find the subprogram for function
+  DISubprogram *Subprogram = nullptr;
+  for (DISubprogram *SP : DbgFinder.subprograms()) {
+    if (SP->getFunction() == F) {
+      Subprogram = SP;
+      break;
+    }
+  }
+
+  DIGlobalVariable *DGV = FindGlobalVariableFor(DbgFinder, GV);
+  if (!DGV)
+    return;
+
+  DITypeIdentifierMap EmptyMap;
+  DIBuilder DIB(*GV->getParent());
+  DIScope *ParentScope = DGV->getScope();
+
+  DIScope *Scope = DIB.createLexicalBlock(Subprogram, ParentScope->getFile(), 0, 0);
+  DebugLoc Loc = DebugLoc::get(0, 0, Scope);
+
+  std::string Name = "global.";
+  Name += DGV->getName();
+
+  DIType *Ty = DGV->getType().resolve(EmptyMap);
+  DILocalVariable *ConvertedLocalVar = DIB.createLocalVariable(llvm::dwarf::Tag::DW_TAG_variable, Scope, Name, DGV->getFile(), DGV->getLine(), Ty);
+  DIB.insertDeclare(AI, ConvertedLocalVar, DIB.createExpression(ArrayRef<int64_t>()), Loc, AI->getNextNode());
+}
+
 bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
 bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
   DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
   DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
   unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
   unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
@@ -6134,6 +6276,8 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV
   }
   }
 
 
   ReplaceConstantWithInst(GV, AI, Builder);
   ReplaceConstantWithInst(GV, AI, Builder);
+  PatchDebugInfo(m_DbgFinder, F, GV, AI);
+
   GV->eraseFromParent();
   GV->eraseFromParent();
   return true;
   return true;
 }
 }

+ 45 - 3
lib/Transforms/Scalar/Scalarizer.cpp

@@ -45,13 +45,19 @@ typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
 // component of a scattered vector or vector pointer.
 // component of a scattered vector or vector pointer.
 class Scatterer {
 class Scatterer {
 public:
 public:
+  bool AllowFolding = false; // HLSL Change
   Scatterer() {}
   Scatterer() {}
 
 
   // Scatter V into Size components.  If new instructions are needed,
   // Scatter V into Size components.  If new instructions are needed,
   // insert them before BBI in BB.  If Cache is nonnull, use it to cache
   // insert them before BBI in BB.  If Cache is nonnull, use it to cache
   // the results.
   // the results.
+#if 0 // HLSL Change
   Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
   Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
             ValueVector *cachePtr = nullptr);
             ValueVector *cachePtr = nullptr);
+#else // HLSL Change
+  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, bool AllowFolding,
+            ValueVector *cachePtr = nullptr);
+#endif // HLSL Change
 
 
   // Return component I, creating a new Value for it if necessary.
   // Return component I, creating a new Value for it if necessary.
   Value *operator[](unsigned I);
   Value *operator[](unsigned I);
@@ -143,6 +149,14 @@ class Scalarizer : public FunctionPass,
 public:
 public:
   static char ID;
   static char ID;
 
 
+// HLSL Change Begin
+  bool AllowFolding = false;
+  Scalarizer(bool AllowFolding) :
+    FunctionPass(ID),
+    AllowFolding(AllowFolding) {
+    initializeScalarizerPass(*PassRegistry::getPassRegistry());
+  }
+// HLSL Change End
   Scalarizer() :
   Scalarizer() :
     FunctionPass(ID) {
     FunctionPass(ID) {
     initializeScalarizerPass(*PassRegistry::getPassRegistry());
     initializeScalarizerPass(*PassRegistry::getPassRegistry());
@@ -197,10 +211,16 @@ char Scalarizer::ID = 0;
 
 
 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
                              "Scalarize vector operations", false, false)
                              "Scalarize vector operations", false, false)
-
+#if 0 // HLSL Change
 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
                      ValueVector *cachePtr)
                      ValueVector *cachePtr)
   : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
   : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+#else // HLSL Change
+Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+                     bool AllowFolding,
+                     ValueVector *cachePtr)
+  : AllowFolding(AllowFolding), BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+#endif // HLSL Change
   Type *Ty = V->getType();
   Type *Ty = V->getType();
   PtrTy = dyn_cast<PointerType>(Ty);
   PtrTy = dyn_cast<PointerType>(Ty);
   if (PtrTy)
   if (PtrTy)
@@ -221,6 +241,7 @@ Value *Scatterer::operator[](unsigned I) {
   if (CV[I])
   if (CV[I])
     return CV[I];
     return CV[I];
   IRBuilder<> Builder(BB, BBI);
   IRBuilder<> Builder(BB, BBI);
+  Builder.AllowFolding = AllowFolding; // HLSL Change
   if (PtrTy) {
   if (PtrTy) {
     if (!CV[0]) {
     if (!CV[0]) {
       Type *Ty =
       Type *Ty =
@@ -295,19 +316,25 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
     auto InsertPoint = BB->begin();
     auto InsertPoint = BB->begin();
     while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
     while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
       InsertPoint++;
       InsertPoint++;
-    return Scatterer(BB, InsertPoint, V, &Scattered[V]);
+    Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
     // HLSL Change - End
     // HLSL Change - End
   }
   }
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
     // Put the scattered form of an instruction directly after the
     // Put the scattered form of an instruction directly after the
     // instruction.
     // instruction.
     BasicBlock *BB = VOp->getParent();
     BasicBlock *BB = VOp->getParent();
+#if 0 // HLSL Change
     return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
     return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
                      V, &Scattered[V]);
                      V, &Scattered[V]);
+#else // HLSL Change
+    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
+                     V, AllowFolding, &Scattered[V]);
+#endif // HLSL Change
   }
   }
   // In the fallback case, just put the scattered before Point and
   // In the fallback case, just put the scattered before Point and
   // keep the result local to Point.
   // keep the result local to Point.
-  return Scatterer(Point->getParent(), Point, V);
+  // return Scatterer(Point->getParent(), Point, V); // HLSL Change
+  return Scatterer(Point->getParent(), Point, V, AllowFolding);
 }
 }
 
 
 // Replace Op with the gathered form of the components in CV.  Defer the
 // Replace Op with the gathered form of the components in CV.  Defer the
@@ -404,6 +431,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
 
 
   unsigned NumElems = VT->getNumElements();
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(I.getParent(), &I);
   IRBuilder<> Builder(I.getParent(), &I);
+  Builder.AllowFolding = AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&I, I.getOperand(0));
   Scatterer Op0 = scatter(&I, I.getOperand(0));
   Scatterer Op1 = scatter(&I, I.getOperand(1));
   Scatterer Op1 = scatter(&I, I.getOperand(1));
   assert(Op0.size() == NumElems && "Mismatched binary operation");
   assert(Op0.size() == NumElems && "Mismatched binary operation");
@@ -424,6 +452,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
 
 
   unsigned NumElems = VT->getNumElements();
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(SI.getParent(), &SI);
   IRBuilder<> Builder(SI.getParent(), &SI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op1 = scatter(&SI, SI.getOperand(1));
   Scatterer Op1 = scatter(&SI, SI.getOperand(1));
   Scatterer Op2 = scatter(&SI, SI.getOperand(2));
   Scatterer Op2 = scatter(&SI, SI.getOperand(2));
   assert(Op1.size() == NumElems && "Mismatched select");
   assert(Op1.size() == NumElems && "Mismatched select");
@@ -465,6 +494,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     return false;
     return false;
 
 
   IRBuilder<> Builder(GEPI.getParent(), &GEPI);
   IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   unsigned NumElems = VT->getNumElements();
   unsigned NumElems = VT->getNumElements();
   unsigned NumIndices = GEPI.getNumIndices();
   unsigned NumIndices = GEPI.getNumIndices();
 
 
@@ -499,6 +529,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
 
 
   unsigned NumElems = VT->getNumElements();
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(CI.getParent(), &CI);
   IRBuilder<> Builder(CI.getParent(), &CI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&CI, CI.getOperand(0));
   Scatterer Op0 = scatter(&CI, CI.getOperand(0));
   assert(Op0.size() == NumElems && "Mismatched cast");
   assert(Op0.size() == NumElems && "Mismatched cast");
   ValueVector Res;
   ValueVector Res;
@@ -519,6 +550,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
   IRBuilder<> Builder(BCI.getParent(), &BCI);
   IRBuilder<> Builder(BCI.getParent(), &BCI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
   ValueVector Res;
   ValueVector Res;
   Res.resize(DstNumElems);
   Res.resize(DstNumElems);
@@ -606,6 +638,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
 
 
   unsigned NumElems = VT->getNumElements();
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(PHI.getParent(), &PHI);
   IRBuilder<> Builder(PHI.getParent(), &PHI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   ValueVector Res;
   ValueVector Res;
   Res.resize(NumElems);
   Res.resize(NumElems);
 
 
@@ -637,6 +670,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
 
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(LI.getParent(), &LI);
   IRBuilder<> Builder(LI.getParent(), &LI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
   ValueVector Res;
   ValueVector Res;
   Res.resize(NumElems);
   Res.resize(NumElems);
@@ -662,6 +696,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
 
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(SI.getParent(), &SI);
   IRBuilder<> Builder(SI.getParent(), &SI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
   Scatterer Val = scatter(&SI, FullValue);
   Scatterer Val = scatter(&SI, FullValue);
 
 
@@ -762,6 +797,7 @@ bool Scalarizer::finish() {
       BasicBlock *BB = Op->getParent();
       BasicBlock *BB = Op->getParent();
       unsigned Count = Ty->getVectorNumElements();
       unsigned Count = Ty->getVectorNumElements();
       IRBuilder<> Builder(BB, Op);
       IRBuilder<> Builder(BB, Op);
+      Builder.AllowFolding = this->AllowFolding; // HLSL Change
       if (isa<PHINode>(Op))
       if (isa<PHINode>(Op))
         Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
         Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
       for (unsigned I = 0; I < Count; ++I)
       for (unsigned I = 0; I < Count; ++I)
@@ -786,6 +822,12 @@ bool Scalarizer::finish() {
   return true;
   return true;
 }
 }
 
 
+// HLSL Change Begin
+FunctionPass *llvm::createScalarizerPass(bool AllowFolding) {
+  Scalarizer *pass = new Scalarizer(AllowFolding);
+  return pass;
+}
+// HLSL Change End
 FunctionPass *llvm::createScalarizerPass() {
 FunctionPass *llvm::createScalarizerPass() {
   return new Scalarizer();
   return new Scalarizer();
 }
 }

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1418,7 +1418,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       funcProps->ShaderProps.GS.instanceCount = 1;
       funcProps->ShaderProps.GS.instanceCount = 1;
   }
   }
 
 
-  // Computer shader.
+  // Compute shader
   if (const HLSLNumThreadsAttr *Attr = FD->getAttr<HLSLNumThreadsAttr>()) {
   if (const HLSLNumThreadsAttr *Attr = FD->getAttr<HLSLNumThreadsAttr>()) {
     if (isMS) {
     if (isMS) {
       funcProps->ShaderProps.MS.numThreads[0] = Attr->getX();
       funcProps->ShaderProps.MS.numThreads[0] = Attr->getX();

+ 1 - 0
tools/clang/lib/Frontend/CMakeLists.txt

@@ -45,6 +45,7 @@ add_clang_library(clangFrontend
 
 
   DEPENDS
   DEPENDS
   ClangDriverOptions
   ClangDriverOptions
+  TablegenHLSLOptions
 
 
   LINK_LIBS
   LINK_LIBS
   clangAST
   clangAST

+ 39 - 0
tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_nested_noopt.hlsl

@@ -0,0 +1,39 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// Test that SROA for local nested arrays of structs/vectors
+// produces and preserves the extra metadata to express strides
+// in the original user variable.
+
+// CHECK-DAG: alloca [6 x float]
+// CHECK-DAG: alloca [6 x float]
+// CHECK-DAG: %[[a:.*]] = alloca [12 x i32]
+
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [12 x i32]* %[[a]], metadata !{{.*}}, metadata ![[aexpr:.*]]), !dbg !{{.*}}, !dx.dbg.varlayout ![[alayout:.*]]
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [6 x float]* %{{.*}}, metadata !{{.*}}, metadata !{{.*}}), !dbg !{{.*}}, !dx.dbg.varlayout !{{.*}}
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [6 x float]* %{{.*}}, metadata !{{.*}}, metadata !{{.*}}), !dbg !{{.*}}, !dx.dbg.varlayout !{{.*}}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "var"
+
+// Debug info for field a should include a contiguous chunk of 32*4=128 bits at offset 0,
+// and the rest expressed as array stride metadata:
+// CHECK-DAG: ![[aexpr]] = !DIExpression(DW_OP_bit_piece, 0, 128)
+// CHECK-DAG: ![[alayout]] = !{i32 0, i32 256, i32 3}
+
+// Debug info for b should be in two parts (b.x and b.y),
+// it should have bit pieces for the first float,
+// and have associated array stride metadata.
+
+// CHECK-DAG: !DIExpression(DW_OP_bit_piece, 128, 32)
+// CHECK-DAG: !{i32 128, i32 256, i32 3, i32 64, i32 2}
+// CHECK-DAG: !DIExpression(DW_OP_bit_piece, 160, 32)
+// CHECK-DAG: !{i32 160, i32 256, i32 3, i32 64, i32 2}
+
+typedef struct { int a[4]; float2 b[2]; } type[3];
+
+int main() : OUT {
+  type var = (type)0;
+  return var[0].a[0];
+}

+ 28 - 0
tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_noopt.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// Check that debug info is preserved with stride information
+// for arrays of structs getting SROA'd down into arrays of struct elements,
+// when compiling without optimizations.
+
+// CHECK-DAG: %[[intalloca:.*]] = alloca [2 x i32]
+// CHECK-DAG: %[[floatalloca:.*]] = alloca [2 x float]
+
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [2 x i32]* %[[intalloca]], metadata !{{.*}}, metadata ![[intdiexpr:.*]]), !dbg !{{.*}}, !dx.dbg.varlayout ![[intlayout:.*]]
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [2 x float]* %[[floatalloca]], metadata !{{.*}}, metadata ![[floatdiexpr:.*]]), !dbg !{{.*}}, !dx.dbg.varlayout ![[floatlayout:.*]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: !DILocalVariable(tag: DW_TAG_auto_variable, name: "var"
+
+// CHECK-DAG: ![[intdiexpr]] = !DIExpression(DW_OP_bit_piece, 0, 32)
+// CHECK-DAG: ![[intlayout]] = !{i32 0, i32 64, i32 2}
+// CHECK-DAG: ![[floatdiexpr]] = !DIExpression(DW_OP_bit_piece, 32, 32)
+// CHECK-DAG: ![[floatlayout]] = !{i32 32, i32 64, i32 2}
+
+struct intfloat { int i; float f; };
+float4 main(int i : IN) : OUT
+{
+  intfloat var[2] = (intfloat[2])i;
+  return float4(var[0].i, var[0].f, var[1].i, var[1].f);
+}

+ 41 - 0
tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_opt.hlsl

@@ -0,0 +1,41 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi %s | FileCheck %s
+
+// Check that debug info is preserved for arrays of structs
+// getting SROA'd down into arrays of struct elements,
+// then SROA'd into individual allocas and promoted to registers,
+// when compiling with optimizations.
+
+// CHECK-DAG: %[[i1:.*]] = extractvalue %dx.types.CBufRet.i32 %{{.*}}, 0
+// CHECK-DAG: %[[f1:.*]] = extractvalue %dx.types.CBufRet.f32 %{{.*}}, 1
+// CHECK-DAG: %[[i2:.*]] = extractvalue %dx.types.CBufRet.i32 %{{.*}}, 2
+// CHECK-DAG: %[[f2:.*]] = extractvalue %dx.types.CBufRet.f32 %{{.*}}, 3
+
+// CHECK-DAG: call void @llvm.dbg.value(metadata i32 %[[i1]], i64 0, metadata !{{.*}}, metadata ![[i1expr:.*]])
+// CHECK-DAG: call void @llvm.dbg.value(metadata float %[[f1]], i64 0, metadata !{{.*}}, metadata ![[f1expr:.*]])
+// CHECK-DAG: call void @llvm.dbg.value(metadata i32 %[[i2]], i64 0, metadata !{{.*}}, metadata ![[i2expr:.*]])
+// CHECK-DAG: call void @llvm.dbg.value(metadata float %[[f2]], i64 0, metadata !{{.*}}, metadata ![[f2expr:.*]])
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: !DILocalVariable(tag: DW_TAG_auto_variable, name: "var"
+
+// CHECK-DAG: ![[i1expr]] = !DIExpression(DW_OP_bit_piece, 0, 32)
+// CHECK-DAG: ![[f1expr]] = !DIExpression(DW_OP_bit_piece, 32, 32)
+// CHECK-DAG: ![[i2expr]] = !DIExpression(DW_OP_bit_piece, 64, 32)
+// CHECK-DAG: ![[f2expr]] = !DIExpression(DW_OP_bit_piece, 96, 32)
+
+struct intfloat { int i; float f; };
+
+int cb_i1; float cb_f1;
+int cb_i2; float cb_f2;
+
+void main(
+    out int o_i1 : I1, out float o_f1 : F1,
+    out int o_i2 : I2, out float o_f2 : F2)
+{
+  intfloat var[2] = { cb_i1, cb_f1, cb_i2, cb_f2 };
+  
+  o_i1 = var[0].i; o_f1 = var[0].f;
+  o_i2 = var[1].i; o_f2 = var[1].f;
+}

+ 2 - 1
tools/clang/test/CodeGenSPIRV/spirv.debug.cl-option.hlsl

@@ -3,7 +3,8 @@
 // This test ensures that command line options used to generate this module
 // This test ensures that command line options used to generate this module
 // are added to the SPIR-V using OpModuleProcessed.
 // are added to the SPIR-V using OpModuleProcessed.
 
 
-// CHECK: OpModuleProcessed "dxc-cl-option: -E main -T ps_6_1 
+// CHECK: OpModuleProcessed "dxc-cl-option:
+// CHECK-SAME: -E main -T ps_6_1 
 // CHECK-SAME: -fspv-target-env=vulkan1.1 -Zi
 // CHECK-SAME: -fspv-target-env=vulkan1.1 -Zi
 
 
 void main() {}
 void main() {}

+ 18 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/dyn_vec.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test for dynamically index vector
+
+[RootSignature("")]
+float main(float4 vec : COLOR, int index : INDEX) : SV_Target {
+  // CHECK: alloca [4 x float]
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: load
+  return vec[index];
+}
+
+

+ 11 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/fcgl.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc %s -E main -T ps_6_0 -Zi -Od -fcgl | FileCheck %s
+
+// CHECK: @main
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+[RootSignature("")]
+float4 main() : SV_Target {
+  return float4(1,1,1,1);
+};

+ 23 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/global_dyn_vec.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test for dynamically index vector
+
+static float4 MyGlobal;
+
+// CHECK-NOT: internal global
+
+[RootSignature("")]
+float main(float4 vec : COLOR, int index : INDEX) : SV_Target {
+  MyGlobal = vec.zyxw;
+  // CHECK: alloca [4 x float]
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: load
+  return MyGlobal[index];
+}
+
+

+ 22 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/global_vec.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od -Zi | FileCheck %s
+
+// Test for dynamically index vector
+
+static float4 MyGlobal;
+
+// CHECK-NOT: internal global
+
+[RootSignature("")]
+float4 main(float4 vec : COLOR, int index : INDEX) : SV_Target {
+  MyGlobal = vec.zyxw;
+  // CHECK-NOT: alloca
+  return MyGlobal;
+}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.MyGlobal
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.MyGlobal
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.MyGlobal
+

+ 26 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/gv_od.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E main -T ps_6_0 -Od -Zi %s | FileCheck %s
+
+// Regression test for making sure that static variables
+// still work with -Od.
+
+// CHECK: @main
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.gG"
+
+static bool gG;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+Texture2D f(bool foo) {
+  return foo ? tex0 : tex1;
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=2))")]
+float4 main() : sv_target {
+  gG = true;
+  return f(gG).Load(0);
+};

+ 9 - 4
tools/clang/test/HLSLFileCheck/dxil/debug/locals/matrix_no_opt.hlsl

@@ -2,14 +2,19 @@
 
 
 // Test that local matrices preserve debug info without optimizations
 // Test that local matrices preserve debug info without optimizations
 
 
-// CHECK: %[[mat:.*]] = alloca [4 x i32]
-// CHECK: call void @llvm.dbg.declare(metadata [4 x i32]* %[[mat]], metadata ![[divar:.*]], metadata ![[diexpr:.*]])
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar:.*]], metadata ![[diexpr0:[0-9]+]]
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar]], metadata ![[diexpr1:[0-9]+]]
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar]], metadata ![[diexpr2:[0-9]+]]
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar]], metadata ![[diexpr3:[0-9]+]]
 
 
 // Exclude quoted source file (see readme)
 // Exclude quoted source file (see readme)
 // CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
 // CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
 
 
 // CHECK-DAG: ![[divar]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mat"
 // CHECK-DAG: ![[divar]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mat"
-// CHECK-DAG: ![[diexpr]] = !DIExpression()
+// CHECK-DAG: ![[diexpr0]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
+// CHECK-DAG: ![[diexpr1]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
+// CHECK-DAG: ![[diexpr2]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
+// CHECK-DAG: ![[diexpr3]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
 
 
 int2x2 cb_mat;
 int2x2 cb_mat;
 int main() : OUT
 int main() : OUT
@@ -18,4 +23,4 @@ int main() : OUT
   int2x2 mat = cb_mat;
   int2x2 mat = cb_mat;
   // Consume all values but return a scalar to avoid another alloca [4 x i32]
   // Consume all values but return a scalar to avoid another alloca [4 x i32]
   return determinant(mat);
   return determinant(mat);
-}
+}

+ 2 - 2
tools/clang/test/HLSLFileCheck/dxil/debug/locals/temporary_dbg_declare.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T vs_6_0 -Od -Zi %s | FileCheck %s
+// RUN: %dxc -E main -T vs_6_0 -Od -Zi -fcgl %s | FileCheck %s
 
 
 // Test that dbg.declares are emitted for temporaries.
 // Test that dbg.declares are emitted for temporaries.
 
 
@@ -11,4 +11,4 @@ int main(int x : IN) : OUT {
 }
 }
 
 
 // Exclude quoted source file (see readme)
 // Exclude quoted source file (see readme)
-// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}

+ 28 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/mat3x2_dbg.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %{{.*}}, i64 0, metadata ![[var_md:[0-9]+]], metadata ![[expr_md:[0-9]+]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: ![[var_md]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_mat"
+// CHECK-DAG: ![[expr_md]] = !DIExpression(DW_OP_bit_piece,
+
+[RootSignature("")]
+uint3x2 main(uint2 uv : TEXCOORD) : MY_MAT {
+  uint3x2 my_mat = uint3x2(
+    uv.y * 0.5, uv.x * 0.5,
+    1.0 - uv.x, 1.0 - uv.x,
+    1.0 - uv.x, 1.0 - uv.x
+  );
+  return my_mat;
+}
+

+ 25 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/mat_dbg.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %{{.*}}, i64 0, metadata ![[var_md:[0-9]+]], metadata ![[expr_md:[0-9]+]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: ![[var_md]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_mat"
+// CHECK-DAG: ![[expr_md]] = !DIExpression(DW_OP_bit_piece,
+
+[RootSignature("")]
+uint2x2 main(uint2 uv : TEXCOORD) : MY_MAT {
+  uint2x2 my_mat = uint2x2(
+    uv.y * 0.5, uv.x * 0.5,
+    1.0 - uv.x, 1.0 - uv.x
+  );
+  return my_mat;
+}
+

+ 5 - 8
tools/clang/test/HLSLFileCheck/dxil/debug/misc/intrinsic4_dbg.hlsl

@@ -11,17 +11,14 @@
 // CHECK: calculateLOD
 // CHECK: calculateLOD
 // CHECK: i1 false
 // CHECK: i1 false
 // CHECK: texture2DMSGetSamplePosition
 // CHECK: texture2DMSGetSamplePosition
-// CHECK: llvm.dbg.declare(metadata i32* %width
-// CHECK: llvm.dbg.declare(metadata i32* %height
-// CHECK: llvm.dbg.declare(metadata i32* %numOfLevels
 // CHECK: getDimensions
 // CHECK: getDimensions
-// CHECK: llvm.dbg.declare(metadata i32* %arraySize
-// CHECK: llvm.dbg.declare(metadata i32* %numSamples
+// CHECK: llvm.dbg.value(metadata i32 %
+// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
 // CHECK: getDimensions
-// CHECK: llvm.dbg.declare(metadata i32* %numStructs
-// CHECK: llvm.dbg.declare(metadata i32* %stride
+// CHECK: llvm.dbg.value(metadata i32 %
+// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
 // CHECK: getDimensions
-// CHECK: llvm.dbg.declare(metadata i32* %dim
+// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
 // CHECK: getDimensions
 
 
 // Exclude quoted source file (see readme)
 // Exclude quoted source file (see readme)

+ 1 - 1
tools/clang/test/HLSLFileCheck/dxil/debug/misc/share_mem_dbg.hlsl

@@ -22,7 +22,7 @@
 // Make sure source info contents exist.
 // Make sure source info contents exist.
 // CHECK: !{!"DefineA=1", !"DefineB=0"}
 // CHECK: !{!"DefineA=1", !"DefineB=0"}
 // CHECK: share_mem_dbg.hlsl"}
 // CHECK: share_mem_dbg.hlsl"}
-// CHECK: !{!"-E", !"main", !"-T", !"cs_6_0", !"-Zi", !"-Od", !"-D", !"DefineA", !"-D", !"DefineB=0", !"-Qstrip_reflect"}
+// CHECK: !{!"-E", !"main", !"-T", !"cs_6_0", !"-Zi", !"-Od", !"-D", !"DefineA", !"-D", !"DefineB=0", !"-Qstrip_reflect", !"-Qembed_debug"}
 
 
 
 
 struct S {
 struct S {

+ 33 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/no_fold.hlsl

@@ -0,0 +1,33 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float x = 10;
+
+  float y = x + 5;
+  // CHECK: fadd
+  float z = y * 2;
+  // CHECK: fmul
+  float w = z / 0.5;
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: br i1
+  if (w >= 0) {
+    tex = tex1;
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(x,y,z,w);
+}
+

+ 38 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float2 xy = float2(10, 20);
+
+  float2 zw = xy + float2(5, 30);
+  // CHECK: fadd
+  // CHECK: fadd
+
+  float2 foo = zw * 2;
+  // CHECK: fmul
+  // CHECK: fmul
+
+  float2 bar = foo / 0.5;
+  // CHECK: fdiv
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: br i1
+  if (foo.x+bar.y >= 0) {
+    tex = tex1;
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(foo,bar);
+}
+

+ 34 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec_array.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Check that arrays of vectors still work with -Od
+// without all the inst-simplify
+
+[RootSignature("")]
+float2 main(int index : INDEX) : SV_Target {
+
+  float2 values[4] = {
+    float2(1,2),
+    float2(3,4),
+    float2(5,6),
+    float2(7,8),
+  };
+
+  // CHECK: alloca [4 x float]
+  // CHECK: alloca [4 x float]
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: load
+  // CHECK: load
+
+  return values[3];
+}
+

+ 149 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_call.hlsl

@@ -0,0 +1,149 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+
+typedef float4 MyCoolFloat4; 
+static float4 myStaticGlobalVar = float4(1.0, 1.0, 1.0, 1.0);
+
+// Local var with same name as outer scope
+float4 localScopeVar_func(float4 val)
+{
+    float4 color = val * val;
+    return color;
+}
+
+// Local var with same name as register
+float4 localRegVar_func(float4 val)
+{
+    float4 r1 = val;
+    return r1;
+}
+
+// Array
+float4 array_func(float4 val)
+{
+    float result[4];
+    result[0] = val.x;
+    result[1] = val.y;
+    result[2] = val.z;
+    result[3] = val.w;
+    return float4(result[0], result[1], result[2], result[3]);
+}
+
+// Typedef
+float4 typedef_func(float4 val)
+{
+    MyCoolFloat4 result = val;
+    return result;
+}
+
+// Global
+float4 global_func(float4 val)
+{
+    myStaticGlobalVar *= val;
+    return myStaticGlobalVar;
+}
+
+float4 depth4(float4 val)
+{
+    val = val * val;
+    return val;
+}
+
+float4 depth3(float4 val)
+{
+    val = depth4(val) * val;
+    return val;
+}
+
+float4 depth2(float4 val)
+{
+    val = depth3(val) * val;
+    return val;
+}
+
+[RootSignature("")]
+float4 main( float4 unused : SV_POSITION, float4 color : COLOR ) : SV_Target
+{
+    float4 ret1 = localScopeVar_func(color);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret2 = localRegVar_func(ret1);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // ** copy **
+    // CHECK: call void @llvm.donothing()
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret3 = array_func(ret2);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // CHECK: store
+    // CHECK: store
+    // CHECK: store
+    // CHECK: store
+    // CHECK: load
+    // CHECK: load
+    // CHECK: load
+    // CHECK: load
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret4 = typedef_func(ret3);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // ** copy **
+    // CHECK: call void @llvm.donothing()
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret5 = global_func(ret4);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret6 = depth2(ret5);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // depth2() {
+      // ** call **
+      // CHECK: call void @llvm.donothing()
+      // depth3() {
+        // ** call **
+        // CHECK: call void @llvm.donothing()
+        // depth4() {
+          // CHECK: fmul
+          // CHECK: fmul
+          // CHECK: fmul
+          // CHECK: fmul
+          // CHECK: call void @llvm.donothing()
+        // }
+        // CHECK: fmul
+        // CHECK: fmul
+        // CHECK: fmul
+        // CHECK: fmul
+        // CHECK: call void @llvm.donothing()
+      // }
+      // CHECK: fmul
+      // CHECK: fmul
+      // CHECK: fmul
+      // CHECK: fmul
+      // CHECK: call void @llvm.donothing()
+    // }
+
+    return max(ret6, color);
+    // CHECK: call void @llvm.donothing()
+}
+
+

+ 38 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float x = 10;
+  // CHECK: call void @llvm.donothing()
+
+  float y = x + 5;
+  // CHECK: fadd
+  float z = y * 2;
+  // CHECK: fmul
+  float w = z / 0.5;
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: call void @llvm.donothing()
+
+  // CHECK: br i1
+  if (w >= 0) {
+    tex = tex1;
+    // CHECK: call void @llvm.donothing()
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(x,y,z,w);
+  // CHECK: call void @llvm.donothing()
+}
+

+ 43 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold_vec.hlsl

@@ -0,0 +1,43 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float2 xy = float2(10, 20);
+  // CHECK: call void @llvm.donothing()
+
+  float2 zw = xy + float2(5, 30);
+  // CHECK: fadd
+  // CHECK: fadd
+
+  float2 foo = zw * 2;
+  // CHECK: fmul
+  // CHECK: fmul
+
+  float2 bar = foo / 0.5;
+  // CHECK: fdiv
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: call void @llvm.donothing()
+
+  // CHECK: br i1
+  if (foo.x+bar.y >= 0) {
+    tex = tex1;
+    // CHECK: call void @llvm.donothing()
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(foo,bar);
+  // CHECK: call void @llvm.donothing()
+}
+

+ 44 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/calculations.hlsl

@@ -0,0 +1,44 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// CHECK: @main
+
+// CHECK: !dx.controlflow.hints
+// CHECK: !dx.controlflow.hints
+// CHECK: !dx.controlflow.hints
+
+// Make sure that even when we don't simplify cfg, DxilValueCache
+// is still able to figure out values.
+
+static int g_foo;
+static int g_bar;
+static int g_baz;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+Texture2D f(int foo, int bar, int baz) {
+  foo += 10;
+  if (foo+bar < baz*2)
+    return tex0;
+  else
+    return tex1;
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=2))")]
+float4 main() : sv_target {
+  g_foo = 10;
+  [branch]
+  if (g_foo > 10)
+    g_foo = 30;
+  [branch]
+  if (g_foo < 50)
+    g_foo = 90;
+  [branch]
+  if (g_foo > 80)
+    g_bar = 20;
+
+  g_baz = 30;
+  return f(g_foo, g_bar, g_baz).Load(0);
+};
+
+

+ 32 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg.hlsl

@@ -0,0 +1,32 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// CHECK: @main
+
+static bool gG;
+static bool gG2;
+static bool gG3;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+Texture2D tex2 : register(t2);
+
+Texture2D f(bool foo) {
+  if (foo)
+    return tex0;
+  else
+    return tex1;
+}
+
+Texture2D h(bool foo3) {
+  return foo3 ? f(gG2) : tex2;
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=3))")]
+float4 main() : sv_target {
+  gG = true;
+  gG2 = false;
+  gG3 = false;
+  return h(gG).Load(0);
+};
+
+

+ 40 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg2.hlsl

@@ -0,0 +1,40 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// CHECK: @main
+
+static bool gG;
+static bool gG2;
+static bool gG3;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+Texture2D tex2 : register(t2);
+
+Texture2D f(bool foo) {
+  [branch]
+  if (foo)
+    return tex0;
+  else
+    return tex1;
+}
+Texture2D g(bool foo) {
+  [branch]
+  if (foo)
+    return tex1;
+  else
+    return tex2;
+}
+
+Texture2D h(bool foo3) {
+  return foo3 ? f(gG2) : g(gG3);
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=3))")]
+float4 main() : sv_target {
+  gG = true;
+  gG2 = false;
+  gG3 = false;
+  return h(gG).Load(0);
+};
+
+

+ 34 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/lexicalBlock.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// Make sure we are generating branches instead of selects.
+
+// CHECK: @main
+[RootSignature("")]
+float4 main(float4 color : COLOR) : SV_Target
+{
+    int value = 0;
+    
+    // CHECK: br i1
+    if (color.x < 0.5)
+        value = 1;
+        // CHECK: br
+ 
+    // CHECK: br i1
+    if (color.y < 0.5)
+        value = 2;
+        // CHECK: br 
+        
+    // CHECK: br i1
+    if (color.z < 0.5)
+        value = 3;
+        // CHECK: br
+        
+    // CHECK: br i1
+    if (color.w < 0.5)
+        value = 4;
+        // CHECK: br
+                        
+    float4 result = float4(value,1,1,1);
+  
+    return result;
+}

+ 22 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/vec_dbg.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 -Zi -Od %s | FileCheck %s
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %{{.*}}, i64 0, metadata ![[var_md:[0-9]+]], metadata ![[expr_md:[0-9]+]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: ![[var_md]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_uv"
+// CHECK-DAG: ![[expr_md]] = !DIExpression(DW_OP_bit_piece,
+
+[RootSignature("")]
+float2 main(uint2 uv : TEXCOORD) : SV_Target {
+  uint2 my_uv = {
+    uv.y * 0.5,
+    1.0 - uv.x,
+  };
+  return my_uv;
+}
+

+ 1 - 1
tools/clang/test/HLSLFileCheck/hlsl/control_flow/if_else/if2.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 -Od %s -fcgl | FileCheck %s
 
 
 // CHECK: !"dx.controlflow.hints", i32 2
 // CHECK: !"dx.controlflow.hints", i32 2
 
 

+ 6 - 4
tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/sample_kwd.hlsl

@@ -1,9 +1,11 @@
 // RUN: %dxc -T ps_6_0 -Od -E main %s | FileCheck %s
 // RUN: %dxc -T ps_6_0 -Od -E main %s | FileCheck %s
 
 
-// CHECK: %precise = alloca float, align 4
-// CHECK: %globallycoherent = alloca i32, align 4
-// CHECK: %sample = alloca float, align 4
-// CHECK: %center = alloca float, align 4
+// Used to check the following, but allocas are now gone, so they no longer exist.
+//
+//    %precise = alloca float, align 4
+//    %globallycoherent = alloca i32, align 4
+//    %sample = alloca float, align 4
+//    %center = alloca float, align 4
 
 
 // CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %MyBuffer_UAV_structbuf, i32 0, i32 0)
 // CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %MyBuffer_UAV_structbuf, i32 0, i32 0)
 // CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %MyBuffer_UAV_structbuf, i32 0, i32 16)
 // CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %MyBuffer_UAV_structbuf, i32 0, i32 16)

+ 3 - 3
tools/clang/test/HLSLFileCheck/hlsl/types/boolean/bool_scalar_swizzle.hlsl

@@ -1,11 +1,11 @@
-// RUN: %dxc -E main -T ps_6_0 -O0 %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 -O0 %s -fcgl | FileCheck %s
 
 
 // This is mostly a regression test for a bug where a bitcast
 // This is mostly a regression test for a bug where a bitcast
 // from i32* to i1* was emitted.
 // from i32* to i1* was emitted.
 
 
 // CHECK: alloca i32
 // CHECK: alloca i32
-// CHECK: alloca [2 x i32]
-// CHECK-NOT: bitcast
+// CHECK: alloca <2 x i32>
+// CHECK-NOT: bitcast i32* %b to <1 x i1>*
 
 
 float main() : SV_Target
 float main() : SV_Target
 {
 {

+ 5 - 18
tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store.hlsl

@@ -3,52 +3,39 @@
 // Ensure that bools are converted from/to their memory representation when loaded/stored
 // Ensure that bools are converted from/to their memory representation when loaded/stored
 // in local variables.
 // in local variables.
 
 
-// Local variables should never be i1s
-// CHECK-NOT: alloca {{.*}}i1
-
 int main(int i : I) : OUT
 int main(int i : I) : OUT
 {
 {
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: icmp eq i32 {{.*}}, 42
-    // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool s = i == 42;
     bool s = i == 42;
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1 v = i == 42;
     bool1 v = i == 42;
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1x1 m = i == 42;
     bool1x1 m = i == 42;
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool sa[1] = { i == 42 };
     bool sa[1] = { i == 42 };
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1 va[1] = { i == 42 };
     bool1 va[1] = { i == 42 };
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1x1 ma[1] = { i == 42 };
     bool1x1 ma[1] = { i == 42 };
 
 
-    // CHECK: load i32
-    // CHECK: icmp ne i32 {{.*}}, 0
+    // Used to check icmp ne i32 {{.*}}, 0
+    // but since variable "s" was never stored
+    // to memory, it stayed as an i1 value,
+    // so no need to icmp that to 0.
     return (s
     return (s
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         // CHECK: icmp ne i32 {{.*}}, 0
         && v.x
         && v.x
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         // CHECK: icmp ne i32 {{.*}}, 0
         && m._11
         && m._11
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         // CHECK: icmp ne i32 {{.*}}, 0
         && sa[0]
         && sa[0]
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         // CHECK: icmp ne i32 {{.*}}, 0
         && va[0].x
         && va[0].x
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         // CHECK: icmp ne i32 {{.*}}, 0
         && ma[0]._11) ? 1 : 2;
         && ma[0]._11) ? 1 : 2;
-}
+}

+ 27 - 0
tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store_scalar.hlsl

@@ -0,0 +1,27 @@
+// RUN: %dxc -E main -T vs_6_0 -O0 %s -fcgl | FileCheck %s
+
+// Ensure that bools are converted from/to their memory representation when loaded/stored
+// in local variables.
+
+// Local variables should never be i1s
+// CHECK-NOT: alloca {{.*}}i1
+
+int main(int i : I) : OUT
+{
+    // CHECK: alloca i32
+    // CHECK: icmp eq i32 {{.*}}, 42
+    // CHECK: zext i1 {{.*}} to i32
+    bool s = i == 42;
+    bool1 v = i == 42;
+    bool1x1 m = i == 42;
+    bool sa[1] = { i == 42 };
+    bool1 va[1] = { i == 42 };
+    bool1x1 ma[1] = { i == 42 };
+
+    return (s
+        && v.x
+        && m._11
+        && sa[0]
+        && va[0].x
+        && ma[0]._11) ? 1 : 2;
+}

+ 2 - 2
tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/center/center_kwd.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -T ps_6_0 -Od -E main %s | FileCheck %s 
 // RUN: %dxc -T ps_6_0 -Od -E main %s | FileCheck %s 
 
 
-// CHECK: %center = alloca float, align 4
+// CHECK: @main
 
 
 // make sure 'center' is allowed as an interpolation modifier
 // make sure 'center' is allowed as an interpolation modifier
 float main(center float t : T) : SV_TARGET
 float main(center float t : T) : SV_TARGET
@@ -8,4 +8,4 @@ float main(center float t : T) : SV_TARGET
     // and also as an identifier
     // and also as an identifier
     float center = 10.0f;
     float center = 10.0f;
     return center * 2;
     return center * 2;
-}
+}

+ 3 - 2
tools/clang/test/HLSLFileCheck/samples/d3d11/SubD11_SubDToBezierHS.hlsl

@@ -3,8 +3,9 @@
 // CHECK: primitiveID
 // CHECK: primitiveID
 // CHECK: storePatchConstant
 // CHECK: storePatchConstant
 // CHECK: main
 // CHECK: main
-// CHECK: primitiveID
-// CHECK: bufferLoad
+// These values are not used other than being stored into array allocas, which we now remove.
+// xCHECK: primitiveID
+// xCHECK: bufferLoad
 // CHECK: storeOutput
 // CHECK: storeOutput
 
 
 //--------------------------------------------------------------------------------------
 //--------------------------------------------------------------------------------------

+ 2 - 2
tools/clang/tools/dxa/dxa.cpp

@@ -95,7 +95,7 @@ void DxaContext::Assemble() {
         }
         }
       }
       }
 
 
-      WriteBlobToFile(pContainer, StringRefUtf16(OutputFilename));
+      WriteBlobToFile(pContainer, StringRefUtf16(OutputFilename), DXC_CP_UTF8); // TODO: Support DefaultTextCodePage
     }
     }
   }
   }
 }
 }
@@ -298,7 +298,7 @@ bool DxaContext::ExtractPart(const char *pName) {
         std::swap(pModuleBlob, pContent);
         std::swap(pModuleBlob, pContent);
       }
       }
 
 
-      WriteBlobToFile(pContent, StringRefUtf16(OutputFilename));
+      WriteBlobToFile(pContent, StringRefUtf16(OutputFilename), DXC_CP_UTF8); // TODO: Support DefaultTextCodePage
       printf("%Iu bytes written to %s\n", pContent->GetBufferSize(), OutputFilename.c_str());
       printf("%Iu bytes written to %s\n", pContent->GetBufferSize(), OutputFilename.c_str());
       return true;
       return true;
     }
     }

+ 58 - 27
tools/clang/tools/dxclib/dxc.cpp

@@ -159,8 +159,8 @@ public:
   void GetCompilerVersionInfo(llvm::raw_string_ostream &OS);
   void GetCompilerVersionInfo(llvm::raw_string_ostream &OS);
 };
 };
 
 
-static void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, llvm::StringRef FName) {
-  ::dxc::WriteBlobToFile(pBlob, StringRefUtf16(FName));
+static void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, llvm::StringRef FName, UINT32 defaultTextCodePage) {
+  ::dxc::WriteBlobToFile(pBlob, StringRefUtf16(FName), defaultTextCodePage);
 }
 }
 
 
 static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
 static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
@@ -191,6 +191,16 @@ static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
   }
   }
 }
 }
 
 
+static void WriteDxcOutputToFile(DXC_OUT_KIND kind, IDxcResult *pResult, UINT32 textCodePage) {
+  if (pResult->HasOutput(kind)) {
+    CComPtr<IDxcBlob> pData;
+    CComPtr<IDxcBlobUtf16> pName;
+    IFT(pResult->GetOutput(kind, IID_PPV_ARGS(&pData), &pName));
+    if (pName && pName->GetStringLength() > 0)
+      WriteBlobToFile(pData, pName->GetStringPointer(), textCodePage);
+  }
+}
+
 // This function is called either after the compilation is done or /dumpbin option is provided
 // This function is called either after the compilation is done or /dumpbin option is provided
 // Performing options that are used to process dxil container.
 // Performing options that are used to process dxil container.
 int DxcContext::ActOnBlob(IDxcBlob *pBlob) {
 int DxcContext::ActOnBlob(IDxcBlob *pBlob) {
@@ -211,7 +221,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
     if (!m_Opts.ExtractRootSignature) {
     if (!m_Opts.ExtractRootSignature) {
       CComPtr<IDxcBlob> pResult;
       CComPtr<IDxcBlob> pResult;
       UpdatePart(pBlob, &pResult);
       UpdatePart(pBlob, &pResult);
-      WriteBlobToFile(pResult, m_Opts.OutputObject);
+      WriteBlobToFile(pResult, m_Opts.OutputObject, m_Opts.DefaultTextCodePage);
     }
     }
   }
   }
 
 
@@ -229,7 +239,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
     if (pDebugBlob != nullptr) {
     if (pDebugBlob != nullptr) {
       IFTBOOLMSG(pDebugBlobName && *pDebugBlobName, E_INVALIDARG,
       IFTBOOLMSG(pDebugBlobName && *pDebugBlobName, E_INVALIDARG,
         "/Fd was specified but no debug name was produced");
         "/Fd was specified but no debug name was produced");
-      WriteBlobToFile(pDebugBlob, pDebugBlobName);
+      WriteBlobToFile(pDebugBlob, pDebugBlobName, m_Opts.DefaultTextCodePage);
     } else {
     } else {
       // Note: This is for load from binary case
       // Note: This is for load from binary case
       WritePartToFile(pBlob, hlsl::DFCC_ShaderDebugInfoDXIL, m_Opts.DebugFile);
       WritePartToFile(pBlob, hlsl::DFCC_ShaderDebugInfoDXIL, m_Opts.DebugFile);
@@ -240,7 +250,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
   if (m_Opts.ExtractRootSignature) {
   if (m_Opts.ExtractRootSignature) {
     CComPtr<IDxcBlob> pRootSignatureContainer;
     CComPtr<IDxcBlob> pRootSignatureContainer;
     ExtractRootSignature(pBlob, &pRootSignatureContainer);
     ExtractRootSignature(pBlob, &pRootSignatureContainer);
-    WriteBlobToFile(pRootSignatureContainer, m_Opts.OutputObject);
+    WriteBlobToFile(pRootSignatureContainer, m_Opts.OutputObject, m_Opts.DefaultTextCodePage);
   }
   }
 
 
   // Extract and write private data.
   // Extract and write private data.
@@ -299,7 +309,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
     WriteHeader(pDisassembleResult, pBlob, varName,
     WriteHeader(pDisassembleResult, pBlob, varName,
                 StringRefUtf16(m_Opts.OutputHeader));
                 StringRefUtf16(m_Opts.OutputHeader));
   } else if (!m_Opts.AssemblyCode.empty()) {
   } else if (!m_Opts.AssemblyCode.empty()) {
-    WriteBlobToFile(pDisassembleResult, m_Opts.AssemblyCode);
+    WriteBlobToFile(pDisassembleResult, m_Opts.AssemblyCode, m_Opts.DefaultTextCodePage);
   } else {
   } else {
     WriteBlobToConsole(pDisassembleResult);
     WriteBlobToConsole(pDisassembleResult);
   }
   }
@@ -364,7 +374,7 @@ void DxcContext::UpdatePart(IDxcBlob *pSource, IDxcBlob **ppResult) {
     CComPtr<IDxcBlobEncoding> pErrors;
     CComPtr<IDxcBlobEncoding> pErrors;
     IFT(pBuilderResult->GetErrorBuffer(&pErrors));
     IFT(pBuilderResult->GetErrorBuffer(&pErrors));
     if (pErrors != nullptr) {
     if (pErrors != nullptr) {
-      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile, m_Opts.DefaultTextCodePage);
     }
     }
   }
   }
   else {
   else {
@@ -490,7 +500,7 @@ int DxcContext::VerifyRootSignature() {
     if (!m_Opts.OutputWarningsFile.empty()) {
     if (!m_Opts.OutputWarningsFile.empty()) {
       CComPtr<IDxcBlobEncoding> pErrors;
       CComPtr<IDxcBlobEncoding> pErrors;
       IFT(pOperationResult->GetErrorBuffer(&pErrors));
       IFT(pOperationResult->GetErrorBuffer(&pErrors));
-      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile, m_Opts.DefaultTextCodePage);
     }
     }
     else {
     else {
       WriteOperationErrorsToConsole(pOperationResult, m_Opts.OutputWarnings);
       WriteOperationErrorsToConsole(pOperationResult, m_Opts.OutputWarnings);
@@ -801,7 +811,7 @@ int DxcContext::Compile() {
   if (!m_Opts.OutputWarningsFile.empty()) {
   if (!m_Opts.OutputWarningsFile.empty()) {
     CComPtr<IDxcBlobEncoding> pErrors;
     CComPtr<IDxcBlobEncoding> pErrors;
     IFT(pCompileResult->GetErrorBuffer(&pErrors));
     IFT(pCompileResult->GetErrorBuffer(&pErrors));
-    WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+    WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile, m_Opts.DefaultTextCodePage);
   }
   }
   else {
   else {
     WriteOperationErrorsToConsole(pCompileResult, m_Opts.OutputWarnings);
     WriteOperationErrorsToConsole(pCompileResult, m_Opts.OutputWarnings);
@@ -812,10 +822,16 @@ int DxcContext::Compile() {
   if (SUCCEEDED(status) || m_Opts.AstDump || m_Opts.OptDump) {
   if (SUCCEEDED(status) || m_Opts.AstDump || m_Opts.OptDump) {
     CComPtr<IDxcBlob> pProgram;
     CComPtr<IDxcBlob> pProgram;
     IFT(pCompileResult->GetResult(&pProgram));
     IFT(pCompileResult->GetResult(&pProgram));
-    pCompiler.Release();
-    pCompileResult.Release();
     if (pProgram.p != nullptr) {
     if (pProgram.p != nullptr) {
       ActOnBlob(pProgram.p, pDebugBlob, outputPDBPath.c_str());
       ActOnBlob(pProgram.p, pDebugBlob, outputPDBPath.c_str());
+
+      // Now write out extra parts
+      CComPtr<IDxcResult> pResult;
+      if (SUCCEEDED(pCompileResult->QueryInterface(&pResult))) {
+        WriteDxcOutputToFile(DXC_OUT_ROOT_SIGNATURE, pResult, m_Opts.DefaultTextCodePage);
+        WriteDxcOutputToFile(DXC_OUT_SHADER_HASH, pResult, m_Opts.DefaultTextCodePage);
+        WriteDxcOutputToFile(DXC_OUT_REFLECTION, pResult, m_Opts.DefaultTextCodePage);
+      }
     }
     }
   }
   }
   return status;
   return status;
@@ -861,7 +877,7 @@ void DxcContext::Preprocess() {
   if (SUCCEEDED(status)) {
   if (SUCCEEDED(status)) {
     CComPtr<IDxcBlob> pProgram;
     CComPtr<IDxcBlob> pProgram;
     IFT(pPreprocessResult->GetResult(&pProgram));
     IFT(pPreprocessResult->GetResult(&pProgram));
-    WriteBlobToFile(pProgram, m_Opts.Preprocess);
+    WriteBlobToFile(pProgram, m_Opts.Preprocess, m_Opts.DefaultTextCodePage);
   }
   }
 }
 }
 
 
@@ -873,20 +889,34 @@ static void WriteString(HANDLE hFile, _In_z_ LPCSTR value, LPCWSTR pFileName) {
 
 
 void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
 void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
                              llvm::Twine &pVariableName, LPCWSTR pFileName) {
                              llvm::Twine &pVariableName, LPCWSTR pFileName) {
-  CHandle file(CreateFileW(pFileName, GENERIC_WRITE, FILE_SHARE_READ, nullptr,
-                           CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
-  if (file == INVALID_HANDLE_VALUE) {
-    IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
-  }
+  // Use older interface for compatibility with older DLL.
+  CComPtr<IDxcLibrary> pLibrary;
+  IFT(CreateInstance(CLSID_DxcLibrary, &pLibrary));
+
+  std::string s;
+  llvm::raw_string_ostream OS(s);
 
 
   {
   {
-    std::string s;
-    llvm::raw_string_ostream OS(s);
+    // Not safe to assume pDisassembly is utf8, must GetBlobAsUtf8 first.
+    CComPtr<IDxcBlobEncoding> pDisasmEncoding;
+    IFT(pLibrary->GetBlobAsUtf8(pDisassembly, &pDisasmEncoding));
+
+    // Don't fail if this QI doesn't succeed (older dll, perhaps)
+    CComPtr<IDxcBlobUtf8> pDisasmUtf8;
+    pDisasmEncoding->QueryInterface(&pDisasmUtf8);
+
+    LPCSTR pBytes = pDisasmUtf8 ? pDisasmUtf8->GetStringPointer()
+                                : (LPCSTR)pDisasmEncoding->GetBufferPointer();
+    // IDxcBlobUtf8's GetStringLength will return length without null character
+    size_t len = pDisasmUtf8 ? pDisasmUtf8->GetStringLength()
+                             : pDisasmEncoding->GetBufferSize();
+    // Just in case there are still any null characters at the end, get rid of them.
+    while (len && pBytes[len-1] == '\0')
+      len -= 1;
+
     // Note: with \r\n line endings, writing the disassembly could be a simple
     // Note: with \r\n line endings, writing the disassembly could be a simple
     // WriteBlobToHandle with a prior and following WriteString for #ifs
     // WriteBlobToHandle with a prior and following WriteString for #ifs
     OS << "#if 0\r\n";
     OS << "#if 0\r\n";
-    const uint8_t *pBytes = (const uint8_t *)pDisassembly->GetBufferPointer();
-    size_t len = pDisassembly->GetBufferSize();
     s.reserve(len + len * 0.1f); // rough estimate
     s.reserve(len + len * 0.1f); // rough estimate
     for (size_t i = 0; i < len; ++i) {
     for (size_t i = 0; i < len; ++i) {
       if (pBytes[i] == '\n')
       if (pBytes[i] == '\n')
@@ -894,13 +924,9 @@ void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
       OS << pBytes[i];
       OS << pBytes[i];
     }
     }
     OS << "\r\n#endif\r\n";
     OS << "\r\n#endif\r\n";
-    OS.flush();
-    WriteString(file, s.c_str(), pFileName);
   }
   }
 
 
   {
   {
-    std::string s;
-    llvm::raw_string_ostream OS(s);
     OS << "\r\nconst unsigned char " << pVariableName << "[] = {";
     OS << "\r\nconst unsigned char " << pVariableName << "[] = {";
     const uint8_t *pBytes = (const uint8_t *)pCode->GetBufferPointer();
     const uint8_t *pBytes = (const uint8_t *)pCode->GetBufferPointer();
     size_t len = pCode->GetBufferSize();
     size_t len = pCode->GetBufferSize();
@@ -916,9 +942,14 @@ void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
       OS.write_hex(pBytes[i]);
       OS.write_hex(pBytes[i]);
     }
     }
     OS << "\r\n};\r\n";
     OS << "\r\n};\r\n";
-    OS.flush();
-    WriteString(file, s.c_str(), pFileName);
   }
   }
+
+  OS.flush();
+
+  // Respect user's -encoding option
+  CComPtr<IDxcBlobEncoding> pOutBlob;
+  pLibrary->CreateBlobWithEncodingFromPinned(s.data(), s.length(), DXC_CP_UTF8, &pOutBlob);
+  WriteBlobToFile(pOutBlob, pFileName, m_Opts.DefaultTextCodePage);
 }
 }
 
 
 // Finds DXIL module from the blob assuming blob is either DxilContainer, DxilPartHeader, or DXIL module
 // Finds DXIL module from the blob assuming blob is either DxilContainer, DxilPartHeader, or DXIL module

Some files were not shown because too many files changed in this diff