Browse Source

Integrate dxcapi v2 and other changes from internal (#2575)

* Integrate changes from internal.

- dxcapi v2
- new dxc options
- DxilValueCache
- PDB and NoOpt improvements
- noop / llvm::donothing() support

* Update dxrfallbacklayer for dxcapi internal changes

* Reorder diag block based on whether pDiag is set first.

* llvm::donothing() requires dxil 1.6 / SM 6.6 for now, lib as well.

* Fixes for spir-v, non-VC compiler and non-Windows builds

- DEFINE_CROSS_PLATFORM_UUIDOF for new interfaces
- add SAL annotations
- turn output argument validation for -P into warning
- handle warnings without concatenating them to main output
- update spirv preprocessing and compilation paths
- return E_NOTIMPL from IDxcUtils::CreateReflection
- cleanup: DxcContainerBuilder back to uft8, DxcTestUtils: remove comment

* Fix some warnings from clang/gcc.

* Fix unicode conversion problems on linux, where sizeof(wchar_t) == 4

Note this is an intermediate fix.
On linux, what we are calling utf16 is actually a wide string
that's probably utf32.  This change fixes issues introduced by
the new interface changes so things are consistent and pass tests.

A future fix should correct the encodings so they are correctly labeled
on platforms where wchar_t doesn't mean UTF16.

* Return false for IsBufferNullTerminated when CP_ACP.

One test for Disassembler was crashing because it created a pinned blob
with a size of 1 << 31 + 1 without actual memory backing this.  The
IsBufferNullTerminated would attempt to see if this was null terminated,
causing AV.

This change also removes CP_UTF8 from this test when it was creating
binary blobs, not UTF8 text blobs.
Tex Riddell 5 năm trước cách đây
mục cha
commit
f4965b71dd
100 tập tin đã thay đổi với 4545 bổ sung775 xóa
  1. 16 0
      include/dxc/DXIL/DxilMetadataHelper.h
  2. 1 0
      include/dxc/DXIL/DxilPDB.h
  3. 2 1
      include/dxc/DxilContainer/DxilContainer.h
  4. 2 1
      include/dxc/DxilContainer/DxilContainerAssembler.h
  5. 2 2
      include/dxc/DxilContainer/DxilPipelineStateValidation.h
  6. 2 0
      include/dxc/DxilContainer/DxilRuntimeReflection.h
  7. 2 2
      include/dxc/DxilRootSignature/DxilRootSignature.h
  8. 1 1
      include/dxc/HLSL/DxilConvergentName.h
  9. 77 0
      include/dxc/HLSL/DxilValueCache.h
  10. 3 3
      include/dxc/Support/DxcLangExtensionsHelper.h
  11. 25 8
      include/dxc/Support/FileIOHelper.h
  12. 5 1
      include/dxc/Support/HLSLOptions.h
  13. 29 8
      include/dxc/Support/HLSLOptions.td
  14. 7 0
      include/dxc/Support/Unicode.h
  15. 2 0
      include/dxc/Support/WinAdapter.h
  16. 463 50
      include/dxc/Support/dxcapi.impl.h
  17. 2 2
      include/dxc/Support/dxcapi.use.h
  18. 1 1
      include/dxc/Support/dxcfilesystem.h
  19. 321 71
      include/dxc/dxcapi.h
  20. 9 1
      include/dxc/dxcdxrfallbackcompiler.h
  21. 5 0
      include/llvm/Analysis/InstructionSimplify.h
  22. 51 0
      include/llvm/IR/IRBuilder.h
  23. 3 0
      include/llvm/InitializePasses.h
  24. 10 0
      include/llvm/Transforms/Scalar.h
  25. 335 6
      lib/Analysis/InstructionSimplify.cpp
  26. 36 0
      lib/DXIL/DxilMetadataHelper.cpp
  27. 6 2
      lib/DXIL/DxilModule.cpp
  28. 2 1
      lib/DXIL/DxilOperations.cpp
  29. 58 33
      lib/DXIL/DxilPDB.cpp
  30. 1 1
      lib/DXIL/DxilShaderFlags.cpp
  31. 5 3
      lib/DXIL/DxilShaderModel.cpp
  32. 5 1
      lib/DXIL/DxilUtil.cpp
  33. 523 271
      lib/DxcSupport/FileIOHelper.cpp
  34. 65 28
      lib/DxcSupport/HLSLOptions.cpp
  35. 26 10
      lib/DxcSupport/Unicode.cpp
  36. 102 20
      lib/DxcSupport/dxcapi.use.cpp
  37. 5 4
      lib/DxcSupport/dxcmem.cpp
  38. 17 4
      lib/DxilContainer/DxilContainerAssembler.cpp
  39. 5 0
      lib/DxilDia/DxilDiaSymbolManager.cpp
  40. 1 1
      lib/DxilDia/DxilDiaSymbolManager.h
  41. 1 1
      lib/DxrFallback/StateFunctionTransform.cpp
  42. 2 0
      lib/HLSL/CMakeLists.txt
  43. 1 1
      lib/HLSL/ComputeViewIdStateBuilder.cpp
  44. 5 0
      lib/HLSL/DxcOptimizer.cpp
  45. 174 4
      lib/HLSL/DxilCondenseResources.cpp
  46. 27 5
      lib/HLSL/DxilContainerReflection.cpp
  47. 164 0
      lib/HLSL/DxilNoops.cpp
  48. 7 14
      lib/HLSL/DxilPatchShaderRecordBindings.cpp
  49. 25 5
      lib/HLSL/DxilPreparePasses.cpp
  50. 451 0
      lib/HLSL/DxilValueCache.cpp
  51. 1 1
      lib/HLSL/HLExpandStoreIntrinsics.cpp
  52. 4 3
      lib/HLSL/HLModule.cpp
  53. 1 21
      lib/HLSL/HLOperationLower.cpp
  54. 2 2
      lib/HLSL/HLSignatureLower.cpp
  55. 28 17
      lib/Transforms/IPO/PassManagerBuilder.cpp
  56. 1 0
      lib/Transforms/Scalar/CMakeLists.txt
  57. 239 0
      lib/Transforms/Scalar/DxilEliminateVector.cpp
  58. 3 3
      lib/Transforms/Scalar/DxilEraseDeadRegion.cpp
  59. 4 23
      lib/Transforms/Scalar/DxilLoopUnroll.cpp
  60. 2 9
      lib/Transforms/Scalar/LowerTypePasses.cpp
  61. 35 0
      lib/Transforms/Scalar/SROA.cpp
  62. 194 50
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  63. 45 3
      lib/Transforms/Scalar/Scalarizer.cpp
  64. 1 1
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  65. 1 0
      tools/clang/lib/Frontend/CMakeLists.txt
  66. 39 0
      tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_nested_noopt.hlsl
  67. 28 0
      tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_noopt.hlsl
  68. 41 0
      tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_opt.hlsl
  69. 2 1
      tools/clang/test/CodeGenSPIRV/spirv.debug.cl-option.hlsl
  70. 18 0
      tools/clang/test/HLSLFileCheck/dxil/debug/dyn_vec.hlsl
  71. 11 0
      tools/clang/test/HLSLFileCheck/dxil/debug/fcgl.hlsl
  72. 23 0
      tools/clang/test/HLSLFileCheck/dxil/debug/global_dyn_vec.hlsl
  73. 22 0
      tools/clang/test/HLSLFileCheck/dxil/debug/global_vec.hlsl
  74. 26 0
      tools/clang/test/HLSLFileCheck/dxil/debug/gv_od.hlsl
  75. 9 4
      tools/clang/test/HLSLFileCheck/dxil/debug/locals/matrix_no_opt.hlsl
  76. 2 2
      tools/clang/test/HLSLFileCheck/dxil/debug/locals/temporary_dbg_declare.hlsl
  77. 28 0
      tools/clang/test/HLSLFileCheck/dxil/debug/mat3x2_dbg.hlsl
  78. 25 0
      tools/clang/test/HLSLFileCheck/dxil/debug/mat_dbg.hlsl
  79. 5 8
      tools/clang/test/HLSLFileCheck/dxil/debug/misc/intrinsic4_dbg.hlsl
  80. 1 1
      tools/clang/test/HLSLFileCheck/dxil/debug/misc/share_mem_dbg.hlsl
  81. 33 0
      tools/clang/test/HLSLFileCheck/dxil/debug/no_fold.hlsl
  82. 38 0
      tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec.hlsl
  83. 34 0
      tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec_array.hlsl
  84. 149 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_call.hlsl
  85. 38 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold.hlsl
  86. 43 0
      tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold_vec.hlsl
  87. 44 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/calculations.hlsl
  88. 32 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg.hlsl
  89. 40 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg2.hlsl
  90. 34 0
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/lexicalBlock.hlsl
  91. 22 0
      tools/clang/test/HLSLFileCheck/dxil/debug/vec_dbg.hlsl
  92. 1 1
      tools/clang/test/HLSLFileCheck/hlsl/control_flow/if_else/if2.hlsl
  93. 6 4
      tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/sample_kwd.hlsl
  94. 3 3
      tools/clang/test/HLSLFileCheck/hlsl/types/boolean/bool_scalar_swizzle.hlsl
  95. 5 18
      tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store.hlsl
  96. 27 0
      tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store_scalar.hlsl
  97. 2 2
      tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/center/center_kwd.hlsl
  98. 3 2
      tools/clang/test/HLSLFileCheck/samples/d3d11/SubD11_SubDToBezierHS.hlsl
  99. 2 2
      tools/clang/tools/dxa/dxa.cpp
  100. 58 27
      tools/clang/tools/dxclib/dxc.cpp

+ 16 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -21,6 +21,7 @@ class LLVMContext;
 class Module;
 class Function;
 class Instruction;
+class DbgDeclareInst;
 class Value;
 class MDOperand;
 class Metadata;
@@ -54,6 +55,14 @@ struct DxilFunctionProps;
 class DxilSubobjects;
 class DxilSubobject;
 
+// Additional debug information for SROA'ed array variables,
+// where adjacent elements in DXIL might not have been adjacent
+// in the original user variable.
+struct DxilDIArrayDim {
+  unsigned StrideInBits;
+  unsigned NumElements;
+};
+
 /// Use this class to manipulate DXIL-spcific metadata.
 // In our code, only DxilModule and HLModule should use this class.
 class DxilMDHelper {
@@ -217,6 +226,9 @@ public:
   // NonUniform attribute.
   static const char kDxilNonUniformAttributeMDName[];
 
+  // Variable debug layout metadata.
+  static const char kDxilVariableDebugLayoutMDName[];
+
   // Validator version.
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*
@@ -484,6 +496,10 @@ public:
   static void MarkPrecise(llvm::Instruction *inst);
   static bool IsMarkedNonUniform(const llvm::Instruction *inst);
   static void MarkNonUniform(llvm::Instruction *inst);
+  static bool GetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned &StartOffsetInBits, std::vector<DxilDIArrayDim> &ArrayDims);
+  static void SetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned StartOffsetInBits, const std::vector<DxilDIArrayDim> &ArrayDims);
 
 private:
   llvm::LLVMContext &m_Ctx;

+ 1 - 0
include/dxc/DXIL/DxilPDB.h

@@ -19,6 +19,7 @@ struct IMalloc;
 namespace hlsl {
 namespace pdb {
 
+  HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppHash, IDxcBlob **ppContainer);
   HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **pOutContainer);
   HRESULT WriteDxilPDB(IMalloc *pMalloc, IDxcBlob *pContainer, llvm::ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob);
 }

+ 2 - 1
include/dxc/DxilContainer/DxilContainer.h

@@ -49,7 +49,7 @@ enum class DxilShaderHashFlags : uint32_t {
 typedef struct DxilShaderHash {
   uint32_t Flags; // DxilShaderHashFlags
   uint8_t Digest[DxilContainerHashSize];
-} DxcShaderHash;
+} DxilShaderHash;
 
 struct DxilContainerVersion {
   uint16_t Major;
@@ -410,6 +410,7 @@ enum class SerializeDxilFlags : uint32_t {
   DebugNameDependOnSource     = 1 << 2, // Make the debug name depend on source (and not just final module).
   StripReflectionFromDxilPart = 1 << 3, // Strip Reflection info from DXIL part.
   IncludeReflectionPart       = 1 << 4, // Include reflection in STAT part.
+  StripRootSignature          = 1 << 5, // Strip Root Signature from main shader container.
 };
 inline SerializeDxilFlags& operator |=(SerializeDxilFlags& l, const SerializeDxilFlags& r) {
   l = static_cast<SerializeDxilFlags>(static_cast<int>(l) | static_cast<int>(r));

+ 2 - 1
include/dxc/DxilContainer/DxilContainerAssembler.h

@@ -52,7 +52,8 @@ void SerializeDxilContainerForModule(hlsl::DxilModule *pModule,
                                      llvm::StringRef DebugName,
                                      SerializeDxilFlags Flags,
                                      DxilShaderHash *pShaderHashOut = nullptr,
-                                     AbstractMemoryStream *pReflectionStreamOut = nullptr);
+                                     AbstractMemoryStream *pReflectionStreamOut = nullptr,
+                                     AbstractMemoryStream *pRootSigStreamOut = nullptr);
 void SerializeDxilContainerForRootSignature(hlsl::RootSignatureHandle *pRootSigHandle,
                                      AbstractMemoryStream *pStream);
 

+ 2 - 2
include/dxc/DxilContainer/DxilPipelineStateValidation.h

@@ -443,7 +443,7 @@ public:
   // returns true if no errors occurred.
   bool InitFromPSV0(const void* pBits, uint32_t size) {
     if(!(pBits != nullptr)) return false;
-    uint8_t* pCurBits = (uint8_t*)pBits;
+    uint8_t* pCurBits = (uint8_t*)const_cast<void*>(pBits);
     uint32_t minsize = sizeof(PSVRuntimeInfo0) + sizeof(uint32_t) * 2;
     if(!(size >= minsize)) return false;
     m_uPSVRuntimeInfoSize = *((const uint32_t*)pCurBits);
@@ -534,7 +534,7 @@ public:
 
       // Input to Output dependencies
       for (unsigned i = 0; i < 4; i++) {
-        if (m_pPSVRuntimeInfo1->SigOutputVectors[i] > 0 && m_pPSVRuntimeInfo1->SigInputVectors > 0) {
+        if (!IsMS() && m_pPSVRuntimeInfo1->SigOutputVectors[i] > 0 && m_pPSVRuntimeInfo1->SigInputVectors > 0) {
           minsize += PSVComputeInputOutputTableSize(m_pPSVRuntimeInfo1->SigInputVectors, m_pPSVRuntimeInfo1->SigOutputVectors[i]);
           if (!(size >= minsize)) return false;
           m_pInputToOutputTable = (uint32_t*)pCurBits;

+ 2 - 0
include/dxc/DxilContainer/DxilRuntimeReflection.h

@@ -137,6 +137,7 @@ public:
   const char *Get(uint32_t offset) const {
     _Analysis_assume_(offset < m_size && m_table &&
                       m_table[m_size - 1] == '\0');
+    (void)m_size; // avoid unused private warning if use above is ignored.
     return m_table + offset;
   }
 };
@@ -185,6 +186,7 @@ public:
       : m_table(table), m_size(size) {}
   const void *Get(uint32_t offset) const {
     _Analysis_assume_(offset < m_size && m_table);
+    (void)m_size; // avoid unused private warning if use above is ignored.
     return (const void*)(((const char*)m_table) + offset);
   }
 };

+ 2 - 2
include/dxc/DxilRootSignature/DxilRootSignature.h

@@ -240,7 +240,7 @@ struct DxilDescriptorRange {
 };
 struct DxilRootDescriptorTable {
   uint32_t NumDescriptorRanges;
-  _Field_size_full_(NumDescriptorRanges)  const DxilDescriptorRange *pDescriptorRanges;
+  _Field_size_full_(NumDescriptorRanges)  DxilDescriptorRange *pDescriptorRanges;
 };
 struct DxilRootConstants {
   uint32_t ShaderRegister;
@@ -275,7 +275,7 @@ struct DxilDescriptorRange1 {
 };
 struct DxilRootDescriptorTable1 {
   uint32_t NumDescriptorRanges;
-  _Field_size_full_(NumDescriptorRanges)  const DxilDescriptorRange1 *pDescriptorRanges;
+  _Field_size_full_(NumDescriptorRanges)  DxilDescriptorRange1 *pDescriptorRanges;
 };
 struct DxilRootParameter1 {
   DxilRootParameterType ParameterType;

+ 1 - 1
include/dxc/HLSL/DxilConvergentName.h

@@ -11,5 +11,5 @@
 #pragma once
 
 namespace hlsl {
-  static char *kConvergentFunctionPrefix = "dxil.convergent.marker.";
+  static const char *kConvergentFunctionPrefix = "dxil.convergent.marker.";
 }

+ 77 - 0
include/dxc/HLSL/DxilValueCache.h

@@ -0,0 +1,77 @@
+//===--------- DxilValueCache.cpp - Dxil Constant Value Cache ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+namespace llvm {
+
+class Module;
+class DominatorTree;
+
+struct DxilValueCache : public ModulePass {
+  static char ID;
+
+  // Special Weak Value to Weak Value map.
+  struct WeakValueMap {
+    struct ValueVH : public CallbackVH {
+      ValueVH(Value *V) : CallbackVH(V) {}
+      void allUsesReplacedWith(Value *) override { setValPtr(nullptr); }
+    };
+    struct ValueEntry {
+      WeakVH Value;
+      ValueVH Self;
+      ValueEntry() : Value(nullptr), Self(nullptr) {}
+      inline void Set(llvm::Value *Key, llvm::Value *V) { Self = Key; Value = V; }
+      inline bool IsStale() const { return Self == nullptr; }
+    };
+    ValueMap<const Value *, ValueEntry> Map;
+    Value *Get(Value *V);
+    void Set(Value *Key, Value *V);
+    bool Seen(Value *v);
+    void SetSentinel(Value *V);
+    void dump() const;
+  private:
+    Value *GetSentinel(LLVMContext &Ctx);
+    std::unique_ptr<Value> Sentinel;
+  };
+
+private:
+
+  WeakValueMap ValueMap;
+
+  void MarkAlwaysReachable(BasicBlock *BB);
+  void MarkNeverReachable(BasicBlock *BB);
+  bool IsAlwaysReachable_(BasicBlock *BB);
+  bool IsNeverReachable_(BasicBlock *BB);
+  Value *OptionallyGetValue(Value *V);
+  Value *ProcessValue(Value *V, DominatorTree *DT);
+
+  Value *ProcessAndSimplify_PHI(Instruction *I, DominatorTree *DT);
+  Value *ProcessAndSimpilfy_Br(Instruction *I, DominatorTree *DT);
+  Value *SimplifyAndCacheResult(Instruction *I, DominatorTree *DT);
+
+public:
+
+  const char *getPassName() const override;
+  DxilValueCache();
+
+  bool runOnModule(Module &M) override { return false; } // Doesn't do anything by itself.
+  void dump() const;
+  Value *GetValue(Value *V, DominatorTree *DT=nullptr);
+  bool IsAlwaysReachable(BasicBlock *BB, DominatorTree *DT=nullptr);
+  bool IsNeverReachable(BasicBlock *BB, DominatorTree *DT=nullptr);
+};
+
+void initializeDxilValueCachePass(class llvm::PassRegistry &);
+ModulePass *createDxilValueCachePass();
+
+}
+
+

+ 3 - 3
include/dxc/Support/DxcLangExtensionsHelper.h

@@ -174,9 +174,9 @@ public:
 
     // Define a  little function to convert encoded blob into a string.
     auto GetErrorAsString = [&name](const CComPtr<IDxcBlobEncoding> &pBlobString) -> std::string {
-      CComPtr<IDxcBlobEncoding> pUTF8BlobStr;
-      if (SUCCEEDED(hlsl::DxcGetBlobAsUtf8(pBlobString, &pUTF8BlobStr)))
-        return std::string(static_cast<char*>(pUTF8BlobStr->GetBufferPointer()), pUTF8BlobStr->GetBufferSize());
+      CComPtr<IDxcBlobUtf8> pUTF8BlobStr;
+      if (SUCCEEDED(hlsl::DxcGetBlobAsUtf8(pBlobString, DxcGetThreadMallocNoRef(), &pUTF8BlobStr)))
+        return std::string(pUTF8BlobStr->GetStringPointer(), pUTF8BlobStr->GetStringLength());
       else
         return std::string("invalid semantic define " + name);
     };

+ 25 - 8
include/dxc/Support/FileIOHelper.h

@@ -20,6 +20,8 @@
 // Forward declarations.
 struct IDxcBlob;
 struct IDxcBlobEncoding;
+struct IDxcBlobUtf8;
+struct IDxcBlobUtf16;
 
 namespace hlsl {
 
@@ -131,6 +133,26 @@ void WriteBinaryFile(_In_z_ LPCWSTR pFileName,
 UINT32 DxcCodePageFromBytes(_In_count_(byteLen) const char *bytes,
                             size_t byteLen) throw();
 
+// More general create blob functions, used by other functions
+// Null pMalloc means use current thread malloc.
+// bPinned will point to existing memory without managing it;
+// bCopy will copy to heap; bPinned and bCopy are mutually exclusive.
+// If encodingKnown, UTF-8 or UTF-16, and null-termination possible,
+// an IDxcBlobUtf8 or IDxcBlobUtf16 will be constructed.
+// If text, it's best if size includes null terminator when not copying,
+// otherwise IDxcBlobUtf8 or IDxcBlobUtf16 will not be constructed.
+HRESULT DxcCreateBlob(
+    LPCVOID pPtr, SIZE_T size, bool bPinned, bool bCopy,
+    bool encodingKnown, UINT32 codePage,
+    IMalloc *pMalloc, IDxcBlobEncoding **ppBlobEncoding) throw();
+// Create from blob references original blob.
+// Pass nonzero for offset or length for sub-blob reference.
+HRESULT DxcCreateBlobEncodingFromBlob(
+    IDxcBlob *pFromBlob, UINT32 offset, UINT32 length,
+    bool encodingKnown, UINT32 codePage,
+    IMalloc *pMalloc, IDxcBlobEncoding **ppBlobEncoding) throw();
+
+// Load files
 HRESULT
 DxcCreateBlobFromFile(_In_opt_ IMalloc *pMalloc, LPCWSTR pFileName,
                       _In_opt_ UINT32 *pCodePage,
@@ -191,16 +213,11 @@ DxcCreateBlobWithEncodingOnMallocCopy(
   _In_ IMalloc *pIMalloc, _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
   _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
 
-HRESULT DxcGetBlobAsUtf8(_In_ IDxcBlob *pBlob,
-                         _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
-HRESULT
-DxcGetBlobAsUtf8NullTerm(
-    _In_ IDxcBlob *pBlob,
-    _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) throw();
-
+HRESULT DxcGetBlobAsUtf8(_In_ IDxcBlob *pBlob, _In_ IMalloc *pMalloc,
+                         _COM_Outptr_ IDxcBlobUtf8 **pBlobEncoding) throw();
 HRESULT
 DxcGetBlobAsUtf16(_In_ IDxcBlob *pBlob, _In_ IMalloc *pMalloc,
-                  _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
+                  _COM_Outptr_ IDxcBlobUtf16 **pBlobEncoding) throw();
 
 bool IsBlobNullOrEmpty(_In_opt_ IDxcBlob *pBlob) throw();
 

+ 5 - 1
include/dxc/Support/HLSLOptions.h

@@ -102,6 +102,9 @@ public:
   llvm::StringRef OutputHeader; // OPT_Fh
   llvm::StringRef OutputObject; // OPT_Fo
   llvm::StringRef OutputWarningsFile; // OPT_Fe
+  llvm::StringRef OutputReflectionFile; // OPT_Fre
+  llvm::StringRef OutputRootSigFile; // OPT_Frs
+  llvm::StringRef OutputShaderHashFile; // OPT_Fsh
   llvm::StringRef Preprocess; // OPT_P
   llvm::StringRef TargetProfile; // OPT_target_profile
   llvm::StringRef VariableName; // OPT_Vn
@@ -112,6 +115,7 @@ public:
   llvm::StringRef FloatDenormalMode; // OPT_denorm
   std::vector<std::string> Exports; // OPT_exports
   llvm::StringRef DefaultLinkage; // OPT_default_linkage
+  unsigned DefaultTextCodePage = DXC_CP_UTF8; // OPT_encoding
 
   bool AllResourcesBound = false; // OPT_all_resources_bound
   bool AstDump = false; // OPT_ast_dump
@@ -144,7 +148,7 @@ public:
   bool UseHexLiterals = false; // OPT_Lx
   bool UseInstructionByteOffsets = false; // OPT_No
   bool UseInstructionNumbers = false; // OPT_Ni
-  bool NotUseLegacyCBufLoad = false;  // OPT_not_use_legacy_cbuf_load
+  bool NotUseLegacyCBufLoad = false;  // OPT_no_legacy_cbuf_layout
   bool PackPrefixStable = false;  // OPT_pack_prefix_stable
   bool PackOptimized = false;  // OPT_pack_optimized
   bool DisplayIncludeProcess = false; // OPT__vi

+ 29 - 8
include/dxc/Support/HLSLOptions.td

@@ -219,11 +219,17 @@ def flegacy_macro_expansion : Flag<["-", "/"], "flegacy-macro-expansion">, Group
     HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;
 def flegacy_resource_reservation : Flag<["-", "/"], "flegacy-resource-reservation">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
     HelpText<"Reserve unused explicit register assignments for compatibility with shader model 5.0 and below">;
-def not_use_legacy_cbuf_load : Flag<["-", "/"], "not_use_legacy_cbuf_load">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+def no_legacy_cbuf_layout : Flag<["-", "/"], "no-legacy-cbuf-layout">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Do not use legacy cbuffer load">;
-def pack_prefix_stable : Flag<["-", "/"], "pack_prefix_stable">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+def not_use_legacy_cbuf_load_ : Flag<["-", "/"], "not_use_legacy_cbuf_load">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
+  HelpText<"Do not use legacy cbuffer load">;
+def pack_prefix_stable : Flag<["-", "/"], "pack-prefix-stable">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"(default) Pack signatures preserving prefix-stable property - appended elements will not disturb placement of prior elements">;
+def pack_prefix_stable_ : Flag<["-", "/"], "pack_prefix_stable">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"(default) Pack signatures preserving prefix-stable property - appended elements will not disturb placement of prior elements">;
-def pack_optimized : Flag<["-", "/"], "pack_optimized">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+def pack_optimized : Flag<["-", "/"], "pack-optimized">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"Optimize signature packing assuming identical signature provided for each connecting stage">;
+def pack_optimized_ : Flag<["-", "/"], "pack_optimized">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Optimize signature packing assuming identical signature provided for each connecting stage">;
 def hlsl_version : Separate<["-", "/"], "HV">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"HLSL version (2016, 2017, 2018). Default is 2018">;
@@ -242,9 +248,15 @@ def export_shaders_only : Flag<["-", "/"], "export-shaders-only">, Group<hlslcom
   HelpText<"Only export shaders when compiling a library">;
 def default_linkage : Separate<["-", "/"], "default-linkage">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Set default linkage for non-shader functions when compiling or linking to a library target (internal, external)">;
+def encoding : Separate<["-", "/"], "encoding">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Set default encoding for text outputs (utf8|utf16) default=utf8">;
 def validator_version : Separate<["-", "/"], "validator-version">, Group<hlslcomp_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Override validator version for module.  Format: <major.minor> ; Default: DXIL.dll version or current internal version.">;
 
+// Used with API only
+def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
+  HelpText<"Return a module interface instead of serialized output">;
+
 // SPIRV Change Starts
 def spirv : Flag<["-"], "spirv">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Generate SPIR-V code">;
@@ -316,9 +328,9 @@ def Zpr : Flag<["-", "/"], "Zpr">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def Zpc : Flag<["-", "/"], "Zpc">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Pack matrices in column-major order">;
 def Zss : Flag<["-", "/"], "Zss">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
-  HelpText<"Build debug name considering source information">;
+  HelpText<"Compute Shader Hash considering source information">;
 def Zsb : Flag<["-", "/"], "Zsb">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
-  HelpText<"Build debug name considering only output binary">;
+  HelpText<"Compute Shader Hash considering only output binary">;
 
 // deprecated /Gpp def Gpp : Flag<["-", "/"], "Gpp">, HelpText<"Force partial precision">;
 def Gfa : Flag<["-", "/"], "Gfa">, HelpText<"Avoid flow control constructs">, Flags<[CoreOption]>, Group<hlslcomp_Group>;
@@ -339,6 +351,10 @@ def Fe : JoinedOrSeparate<["-", "/"], "Fe">, MetaVarName<"<file>">, HelpText<"Ou
 def Fd : JoinedOrSeparate<["-", "/"], "Fd">, MetaVarName<"<file>">,
   HelpText<"Write debug information to the given file, or automatically named file in directory when ending in '\\'">,
   Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+def Fre : Separate<["-", "/"], "Fre">, MetaVarName<"<file>">, HelpText<"Output reflection to the given file">, Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+def Frs : Separate<["-", "/"], "Frs">, MetaVarName<"<file>">, HelpText<"Output root signature to the given file">, Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+def Fsh : Separate<["-", "/"], "Fsh">, MetaVarName<"<file>">, HelpText<"Output shader hash to the given file">, Flags<[CoreOption, DriverOption]>, Group<hlslcomp_Group>;
+
 def Vn : JoinedOrSeparate<["-", "/"], "Vn">, MetaVarName<"<name>">, HelpText<"Use <name> as variable name in header file">, Flags<[DriverOption]>, Group<hlslcomp_Group>;
 def Cc : Flag<["-", "/"], "Cc">, HelpText<"Output color coded assembly listings">, Group<hlslcomp_Group>, Flags<[DriverOption]>;
 def Ni : Flag<["-", "/"], "Ni">, HelpText<"Output instruction numbers in assembly listings">, Group<hlslcomp_Group>, Flags<[DriverOption]>;
@@ -366,7 +382,8 @@ def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[Core
 def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
 def extractrootsignature : Flag<["-", "/"], "extractrootsignature">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode (must be used with /Fo <file>)">;
 def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
-def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
+def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force-rootsig-ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
+def force_rootsig_ver_    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption, HelpHidden]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
 
 // Temporary TEST options, until reflection inside DXIL part can always be stripped
 def Qkeep_reflect_in_dxil : Flag<["-", "/"], "Qkeep_reflect_in_dxil">,
@@ -386,9 +403,13 @@ def matchUAVs : JoinedOrSeparate<["-", "/"], "matchUAVs">, MetaVarName<"<file>">
 def enable_unbounded_descriptor_tables : Flag<["-", "/"], "enable_unbounded_descriptor_tables">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Enables unbounded descriptor tables">;
 */
-def res_may_alias : Flag<["-", "/"], "res_may_alias">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+def res_may_alias : Flag<["-", "/"], "res-may-alias">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+  HelpText<"Assume that UAVs/SRVs may alias">;
+def res_may_alias_ : Flag<["-", "/"], "res_may_alias">, Flags<[CoreOption, HelpHidden]>, Group<hlslcomp_Group>,
   HelpText<"Assume that UAVs/SRVs may alias">;
-def all_resources_bound : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+def all_resources_bound : Flag<["-", "/"], "all-resources-bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
+  HelpText<"Enables agressive flattening">;
+def all_resources_bound_ : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption, HelpHidden]>, Group<hlslcomp_Group>,
   HelpText<"Enables agressive flattening">;
 
 def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,

+ 7 - 0
include/dxc/Support/Unicode.h

@@ -45,9 +45,15 @@ typedef char acp_char;
 // A ccp_char is a character encoded in the console code page.
 typedef char ccp_char;
 
+_Success_(return != false)
+bool UTF8ToConsoleString(_In_opt_count_(textLen) const char* text, _In_ size_t textLen, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
+
 _Success_(return != false)
 bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
 
+_Success_(return != false)
+bool UTF16ToConsoleString(_In_opt_count_(textLen) const wchar_t* text, _In_ size_t textLen, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
+
 _Success_(return != false)
 bool UTF16ToConsoleString(_In_z_ const wchar_t* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
 
@@ -60,6 +66,7 @@ bool UTF8ToUTF16String(_In_opt_count_(cbUTF8) const char *pUTF8, size_t cbUTF8,
 std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8);
 
 _Success_(return != false)
+bool UTF16ToUTF8String(_In_z_ const wchar_t *pUTF16, size_t cUTF16, _Inout_ std::string *pUTF8);
 bool UTF16ToUTF8String(_In_z_ const wchar_t *pUTF16, _Inout_ std::string *pUTF8);
 
 std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16);

+ 2 - 0
include/dxc/Support/WinAdapter.h

@@ -268,6 +268,7 @@
 #define _In_count_(size)
 #define _In_range_(lb, ub)
 #define _In_bytecount_(size)
+#define _In_opt_bytecount_(size)
 #define _In_NLS_string_(size)
 #define __in_bcount(size)
 
@@ -333,6 +334,7 @@
 #define _COM_Outptr_
 #define _COM_Outptr_opt_
 #define _COM_Outptr_result_maybenull_
+#define _COM_Outptr_opt_result_maybenull_
 
 #define _Null_
 #define _Notnull_

+ 463 - 50
include/dxc/Support/dxcapi.impl.h

@@ -15,6 +15,7 @@
 #include "dxc/dxcapi.h"
 #include "dxc/Support/microcom.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/ArrayRef.h"
 
 // Simple adaptor for IStream. Can probably do better.
 class raw_stream_ostream : public llvm::raw_ostream {
@@ -32,70 +33,269 @@ public:
   }
 };
 
-class DxcOperationResult : public IDxcOperationResult {
-private:
-  DXC_MICROCOM_TM_REF_FIELDS()
+namespace {
+HRESULT TranslateUtf8StringForOutput(
+    _In_opt_count_(size) LPCSTR pStr, SIZE_T size, UINT32 codePage, IDxcBlobEncoding **ppBlobEncoding) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  IFR(hlsl::DxcCreateBlobWithEncodingOnHeapCopy(pStr, size, DXC_CP_UTF8, &pBlobEncoding));
+  if (codePage == DXC_CP_UTF16) {
+    CComPtr<IDxcBlobUtf16> pBlobUtf16;
+    IFT(hlsl::DxcGetBlobAsUtf16(pBlobEncoding, nullptr, &pBlobUtf16))
+      pBlobEncoding = pBlobUtf16;
+  }
+  *ppBlobEncoding = pBlobEncoding.Detach();
+  return S_OK;
+}
 
-  void Init(_In_opt_ IDxcBlob *pResultBlob,
-            _In_opt_ IDxcBlobEncoding *pErrorBlob, HRESULT status) {
-    m_status = status;
-    m_result = pResultBlob;
-    m_errors = pErrorBlob;
+HRESULT TranslateUtf16StringForOutput(
+    _In_opt_count_(size) LPCWSTR pStr, SIZE_T size, UINT32 codePage, IDxcBlobEncoding **ppBlobEncoding) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  IFR(hlsl::DxcCreateBlobWithEncodingOnHeapCopy(pStr, size, DXC_CP_UTF16, &pBlobEncoding));
+  if (codePage == DXC_CP_UTF8) {
+    CComPtr<IDxcBlobUtf8> pBlobUtf8;
+    IFT(hlsl::DxcGetBlobAsUtf8(pBlobEncoding, nullptr, &pBlobUtf8))
+      pBlobEncoding = pBlobUtf8;
   }
+  *ppBlobEncoding = pBlobEncoding.Detach();
+  return S_OK;
+}
 
-public:
-  DXC_MICROCOM_TM_ADDREF_RELEASE_IMPL()
-  DXC_MICROCOM_TM_CTOR(DxcOperationResult)
+HRESULT TranslateStringBlobForOutput(IDxcBlob *pBlob, UINT32 codePage, IDxcBlobEncoding **ppBlobEncoding) {
+  CComPtr<IDxcBlobEncoding> pEncoding;
+  IFR(pBlob->QueryInterface(&pEncoding));
+  BOOL known;
+  UINT32 inputCP;
+  IFR(pEncoding->GetEncoding(&known, &inputCP));
+  IFRBOOL(known, E_INVALIDARG);
+  if (inputCP == DXC_CP_UTF8) {
+    return TranslateUtf8StringForOutput((LPCSTR)pBlob->GetBufferPointer(), pBlob->GetBufferSize(), codePage, ppBlobEncoding);
+  } else if (inputCP == DXC_CP_UTF16) {
+    return TranslateUtf16StringForOutput((LPCWSTR)pBlob->GetBufferPointer(), pBlob->GetBufferSize(), codePage, ppBlobEncoding);
+  }
+  return E_INVALIDARG;
+}
+}
 
-  HRESULT m_status;
-  CComPtr<IDxcBlob> m_result;
-  CComPtr<IDxcBlobEncoding> m_errors;
+typedef enum DxcOutputType {
+  DxcOutputType_None    = 0,
+  DxcOutputType_Blob    = 1,
+  DxcOutputType_Text    = 2,
 
-  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void **ppvObject) override {
-    return DoBasicQueryInterface<IDxcOperationResult>(this, iid, ppvObject);
+  DxcOutputTypeForceDword = 0xFFFFFFFF
+} DxcOutputType;
+
+inline DxcOutputType DxcGetOutputType(DXC_OUT_KIND kind) {
+  switch (kind) {
+  case DXC_OUT_OBJECT:
+  case DXC_OUT_PDB:
+  case DXC_OUT_SHADER_HASH:
+  case DXC_OUT_REFLECTION:
+  case DXC_OUT_ROOT_SIGNATURE:
+    return DxcOutputType_Blob;
+  case DXC_OUT_ERRORS:
+  case DXC_OUT_DISASSEMBLY:
+  case DXC_OUT_HLSL:
+  case DXC_OUT_TEXT:
+    return DxcOutputType_Text;
   }
+  return DxcOutputType_None;
+}
 
-  static HRESULT CreateFromResultErrorStatus(_In_opt_ IDxcBlob *pResultBlob,
-                                             _In_opt_ IDxcBlobEncoding *pErrorBlob,
-                                             HRESULT status,
-                                             _COM_Outptr_ IDxcOperationResult **ppResult) {
-    *ppResult = nullptr;
-    CComPtr<DxcOperationResult> result = DxcOperationResult::Alloc(DxcGetThreadMallocNoRef());
-    IFROOM(result.p);
-    result->Init(pResultBlob, pErrorBlob, status);
-    *ppResult = result.Detach();
+// Update when new results are allowed
+static const unsigned kNumDxcOutputTypes = DXC_OUT_ROOT_SIGNATURE;
+static const SIZE_T kAutoSize = (SIZE_T)-1;
+static const LPCWSTR DxcOutNoName = nullptr;
+
+struct DxcOutputObject {
+  CComPtr<IUnknown> object;
+  CComPtr<IDxcBlobUtf16> name;
+  DXC_OUT_KIND kind = DXC_OUT_NONE;
+
+  /////////////////////////
+  // Convenient set methods
+  /////////////////////////
+
+  HRESULT SetObject(IUnknown *pUnknown, UINT32 codePage = DXC_CP_UTF8) {
+    DXASSERT_NOMSG(!object);
+    if (!pUnknown)
+      return S_OK;
+    if (codePage && DxcGetOutputType(kind) == DxcOutputType_Text) {
+      CComPtr<IDxcBlob> pBlob;
+      IFR(pUnknown->QueryInterface(&pBlob));
+      CComPtr<IDxcBlobEncoding> pEncoding;
+      // If not blob encoding, assume utf-8 text
+      if (FAILED(TranslateStringBlobForOutput(pBlob, codePage, &pEncoding)))
+        IFR(TranslateUtf8StringForOutput(
+          (LPCSTR)pBlob->GetBufferPointer(), pBlob->GetBufferSize(),
+          codePage, &pEncoding));
+      object = pEncoding;
+    } else {
+      object = pUnknown;
+    }
     return S_OK;
   }
+  HRESULT SetObjectData(_In_opt_bytecount_(size) LPCVOID pData, SIZE_T size) {
+    DXASSERT_NOMSG(!object);
+    if (!pData || !size)
+      return S_OK;
+    IDxcBlob *pBlob;
+    IFR(hlsl::DxcCreateBlobOnHeapCopy(pData, size, &pBlob));
+    object = pBlob;
+    return S_OK;
+  }
+  HRESULT SetString(_In_ UINT32 codePage, _In_opt_count_(size) LPCWSTR pText, SIZE_T size = kAutoSize) {
+    DXASSERT_NOMSG(!object);
+    if (!pText)
+      return S_OK;
+    if (size == kAutoSize)
+      size = wcslen(pText);
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf16StringForOutput(pText, size, codePage, &pBlobEncoding));
+    object = pBlobEncoding;
+    return S_OK;
+  }
+  HRESULT SetString(_In_ UINT32 codePage, _In_opt_count_(size) LPCSTR pText, SIZE_T size = kAutoSize) {
+    DXASSERT_NOMSG(!object);
+    if (!pText)
+      return S_OK;
+    if (size == kAutoSize)
+      size = strlen(pText);
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf8StringForOutput(pText, size, codePage, &pBlobEncoding));
+    object = pBlobEncoding;
+    return S_OK;
+  }
+  HRESULT SetName(_In_opt_z_ IDxcBlobUtf16 *pName) {
+    DXASSERT_NOMSG(!name);
+    name = pName;
+    return S_OK;
+  }
+  HRESULT SetName(_In_opt_z_ LPCWSTR pName) {
+    DXASSERT_NOMSG(!name);
+    if (!pName)
+      return S_OK;
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(hlsl::DxcCreateBlobWithEncodingOnHeapCopy(
+          pName, (wcslen(pName) + 1) * sizeof(wchar_t), DXC_CP_UTF16, &pBlobEncoding));
+    return pBlobEncoding->QueryInterface(&name);
+  }
+  HRESULT SetName(_In_opt_z_ LPCSTR pName) {
+    DXASSERT_NOMSG(!name);
+    if (!pName)
+      return S_OK;
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf8StringForOutput(pName, strlen(pName) + 1, DXC_CP_UTF16, &pBlobEncoding));
+    return pBlobEncoding->QueryInterface(&name);
+  }
+  HRESULT SetName(_In_opt_z_ llvm::StringRef Name) {
+    DXASSERT_NOMSG(!name);
+    if (!Name.empty())
+      return S_OK;
+    CComPtr<IDxcBlobEncoding> pBlobEncoding;
+    IFR(TranslateUtf8StringForOutput(Name.data(), Name.size(), DXC_CP_UTF16, &pBlobEncoding));
+    return pBlobEncoding->QueryInterface(&name);
+  }
 
-  static HRESULT
-  CreateFromUtf8Strings(_In_opt_z_ LPCSTR pErrorStr,
-      _In_opt_z_ LPCSTR pResultStr, HRESULT status,
-      _COM_Outptr_ IDxcOperationResult **pResult) {
-    *pResult = nullptr;
-    CComPtr<IDxcBlobEncoding> resultBlob;
-    CComPtr<IDxcBlobEncoding> errorBlob;
+  /////////////////////////////
+  // Static object constructors
+  /////////////////////////////
 
-    HRESULT hr = S_OK;
+  template<typename DataTy, typename NameTy>
+  static DxcOutputObject StringOutput(_In_ DXC_OUT_KIND kind,
+                                      _In_ UINT32 codePage,
+                                      _In_opt_count_(size) DataTy pText, _In_ SIZE_T size,
+                                      _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetString(codePage, pText, size));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  template<typename DataTy, typename NameTy>
+  static DxcOutputObject StringOutput(_In_ DXC_OUT_KIND kind,
+                                      _In_ UINT32 codePage,
+                                      _In_opt_ DataTy pText,
+                                      _In_opt_z_ NameTy pName) {
+    return StringOutput(kind, codePage, pText, kAutoSize, pName);
+  }
+  template<typename NameTy>
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_opt_bytecount_(size) LPCVOID pData, _In_ SIZE_T size,
+                                    _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetObjectData(pData, size));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  template<typename NameTy>
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_opt_ IDxcBlob *pBlob,
+                                    _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetObject(pBlob));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_opt_ IDxcBlob *pBlob) {
+    return DataOutput(kind, pBlob, DxcOutNoName);
+  }
+  template<typename NameTy>
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_ UINT32 codePage,
+                                    _In_opt_ IDxcBlob *pBlob,
+                                    _In_opt_z_ NameTy pName) {
+    DxcOutputObject output;
+    output.kind = kind;
+    IFT(output.SetObject(pBlob, codePage));
+    IFT(output.SetName(pName));
+    return output;
+  }
+  static DxcOutputObject DataOutput(_In_ DXC_OUT_KIND kind,
+                                    _In_ UINT32 codePage,
+                                    _In_opt_ IDxcBlob *pBlob) {
+    return DataOutput(kind, codePage, pBlob, DxcOutNoName);
+  }
 
-    if (pErrorStr != nullptr) {
-      hr = hlsl::DxcCreateBlobWithEncodingOnHeapCopy(
-        pErrorStr, strlen(pErrorStr), CP_UTF8, &errorBlob);
-      if (FAILED(hr)) {
-        return hr;
-      }
-    }
+  template<typename DataTy>
+  static DxcOutputObject ErrorOutput(UINT32 codePage, DataTy pText, SIZE_T size) {
+    return StringOutput(DXC_OUT_ERRORS, codePage, pText, size, DxcOutNoName);
+  }
+  template<typename DataTy>
+  static DxcOutputObject ErrorOutput(UINT32 codePage, DataTy pText) {
+    return StringOutput(DXC_OUT_ERRORS, codePage, pText, DxcOutNoName);
+  }
+  template<typename NameTy>
+  static DxcOutputObject ObjectOutput(LPCVOID pData, SIZE_T size, NameTy pName) {
+    return DataOutput(DXC_OUT_OBJECT, pData, size, pName);
+  }
+  static DxcOutputObject ObjectOutput(LPCVOID pData, SIZE_T size) {
+    return DataOutput(DXC_OUT_OBJECT, pData, size, DxcOutNoName);
+  }
+};
 
-    if (pResultStr != nullptr) {
-      hr = hlsl::DxcCreateBlobWithEncodingOnHeapCopy(
-        pResultStr, strlen(pResultStr), CP_UTF8, &resultBlob);
-      if (FAILED(hr)) {
-        return hr;
-      }
-    }
+class DxcResult : public IDxcResult {
+private:
+  DXC_MICROCOM_TM_REF_FIELDS()
+  HRESULT m_status = S_OK;
+  DxcOutputObject m_outputs[kNumDxcOutputTypes];  // indexed by DXC_OUT_KIND enum - 1
+  DXC_OUT_KIND m_resultType = DXC_OUT_NONE;       // result type for GetResult()
+  UINT32 m_textEncoding = DXC_CP_UTF8;              // encoding for text outputs
+
+public:
+  DXC_MICROCOM_TM_ADDREF_RELEASE_IMPL()
+  DXC_MICROCOM_TM_CTOR(DxcResult)
 
-    return CreateFromResultErrorStatus(resultBlob, errorBlob, status, pResult);
+  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void **ppvObject) override {
+    return DoBasicQueryInterface<IDxcResult, IDxcOperationResult>(this, iid, ppvObject);
   }
 
+  //////////////////////
+  // IDxcOperationResult
+  //////////////////////
+
   HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) override {
     if (pStatus == nullptr)
       return E_INVALIDARG;
@@ -106,12 +306,225 @@ public:
 
   HRESULT STDMETHODCALLTYPE
     GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) override {
-    return m_result.CopyTo(ppResult);
+    *ppResult = nullptr;
+    if (m_resultType == DXC_OUT_NONE)
+      return S_OK;
+    DxcOutputObject *pObject = Output(m_resultType);
+    if (pObject && pObject->object)
+      return pObject->object->QueryInterface(ppResult);
+    return S_OK;
   }
 
   HRESULT STDMETHODCALLTYPE
     GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) override {
-    return m_errors.CopyTo(ppErrors);
+    *ppErrors = nullptr;
+    DxcOutputObject *pObject = Output(DXC_OUT_ERRORS);
+    if (pObject && pObject->object)
+      return pObject->object->QueryInterface(ppErrors);
+    return S_OK;
+  }
+
+  /////////////
+  // IDxcResult
+  /////////////
+
+  BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) override {
+    if (dxcOutKind <= DXC_OUT_NONE || (unsigned)dxcOutKind > kNumDxcOutputTypes)
+      return FALSE;
+    return m_outputs[(unsigned)dxcOutKind - 1].kind != DXC_OUT_NONE;
+  }
+  HRESULT STDMETHODCALLTYPE GetOutput(_In_ DXC_OUT_KIND dxcOutKind,
+      _In_ REFIID iid, _COM_Outptr_opt_result_maybenull_ void **ppvObject,
+      _COM_Outptr_ IDxcBlobUtf16 **ppOutputName) override {
+    if (ppvObject == nullptr)
+      return E_INVALIDARG;
+    if (dxcOutKind <= DXC_OUT_NONE || (unsigned)dxcOutKind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &object = m_outputs[(unsigned)dxcOutKind - 1];
+    if (object.kind == DXC_OUT_NONE)
+      return E_INVALIDARG;
+    *ppvObject = nullptr;
+    if (ppOutputName)
+      *ppOutputName = nullptr;
+    IFR(object.object->QueryInterface(iid, ppvObject));
+    if (ppOutputName && object.name) {
+      object.name.CopyTo(ppOutputName);
+    }
+    return S_OK;
+  }
+
+  UINT32 GetNumOutputs() override {
+    UINT32 numOutputs = 0;
+    for (unsigned i = 0; i < kNumDxcOutputTypes; ++i) {
+      if (m_outputs[i].kind != DXC_OUT_NONE)
+        numOutputs++;
+    }
+    return numOutputs;
+  }
+  DXC_OUT_KIND GetOutputByIndex(UINT32 Index) override {
+    if (!(Index < kNumDxcOutputTypes))
+      return DXC_OUT_NONE;
+    UINT32 numOutputs = 0;
+    unsigned i = 0;
+    for (; i < kNumDxcOutputTypes; ++i) {
+      if (Index == numOutputs)
+        return m_outputs[i].kind;
+      if (m_outputs[i].kind != DXC_OUT_NONE)
+        numOutputs++;
+    }
+    return DXC_OUT_NONE;
+  }
+  DXC_OUT_KIND PrimaryOutput() override {
+    return m_resultType;
+  }
+
+  /////////////////////
+  // Internal Interface
+  /////////////////////
+
+  HRESULT SetEncoding(UINT32 textEncoding) {
+    if (textEncoding != DXC_CP_ACP && textEncoding != DXC_CP_UTF8 && textEncoding != DXC_CP_UTF16)
+      return E_INVALIDARG;
+    m_textEncoding = textEncoding;
+    return S_OK;
+  }
+
+  DxcOutputObject *Output(DXC_OUT_KIND kind) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return nullptr;
+    return &(m_outputs[(unsigned)kind - 1]);
+  }
+
+  HRESULT ClearOutput(DXC_OUT_KIND kind) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &output = m_outputs[(unsigned)kind - 1];
+    output.kind = DXC_OUT_NONE;
+    output.object.Release();
+    output.name.Release();
+    return S_OK;
+  }
+
+  void ClearAllOutputs() {
+    for (unsigned i = DXC_OUT_NONE + 1; i <= kNumDxcOutputTypes; i++)
+      ClearOutput((DXC_OUT_KIND)(i));
+  }
+
+  HRESULT SetStatusAndPrimaryResult(HRESULT status, DXC_OUT_KIND resultType = DXC_OUT_NONE) {
+    if ((unsigned)resultType > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    m_status = status;
+    m_resultType = resultType;
+    return S_OK;
+  }
+
+  // Set output object and name for previously uninitialized entry
+  HRESULT SetOutput(const DxcOutputObject &output) {
+    if (output.kind <= DXC_OUT_NONE || (unsigned)output.kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    if (!output.object)
+      return E_INVALIDARG;
+    DxcOutputObject &internalOutput = m_outputs[(unsigned)output.kind - 1];
+    // Must not be overwriting an existing output
+    if (internalOutput.kind != DXC_OUT_NONE)
+      return E_INVALIDARG;
+    internalOutput = output;
+    return S_OK;
+  }
+
+  // Set or overwrite output object and set the kind
+  HRESULT SetOutputObject(DXC_OUT_KIND kind, IUnknown *pObject) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &output = m_outputs[(unsigned)kind - 1];
+    if (!pObject)
+      kind = DXC_OUT_NONE;
+    output.kind = kind;
+    output.SetObject(pObject, m_textEncoding);
+    return S_OK;
+  }
+  // Set or overwrite output string object and set the kind
+  template<typename StringTy>
+  HRESULT SetOutputString(DXC_OUT_KIND kind, StringTy pString, size_t size = kAutoSize) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    DxcOutputObject &output = m_outputs[(unsigned)kind - 1];
+    if (!pString)
+      kind = DXC_OUT_NONE;
+    output.kind = kind;
+    output.SetString(m_textEncoding, pString, size);
+    return S_OK;
+  }
+  // Set or overwrite the output name.  This does not set kind,
+  // since that indicates an active output, which must have an object.
+  template<typename NameTy>
+  HRESULT SetOutputName(DXC_OUT_KIND kind, NameTy Name) {
+    if (kind <= DXC_OUT_NONE || (unsigned)kind > kNumDxcOutputTypes)
+      return E_INVALIDARG;
+    Output(kind)->SetName(Name);
+    return S_OK;
+  }
+
+  HRESULT SetOutputs(const llvm::ArrayRef<DxcOutputObject> outputs) {
+    for (unsigned i = 0; i < outputs.size(); i++) {
+      const DxcOutputObject &output = outputs.data()[i];
+      // Skip if DXC_OUT_NONE or no object to store
+      if (output.kind == DXC_OUT_NONE || !output.object)
+        continue;
+      IFR(SetOutput(output));
+    }
+    return S_OK;
+  }
+
+  HRESULT CopyOutputsFromResult(IDxcResult *pResult) {
+    if (!pResult)
+      return E_INVALIDARG;
+    for (unsigned i = 0; i < kNumDxcOutputTypes; i++) {
+      DxcOutputObject &output = m_outputs[i];
+      DXC_OUT_KIND kind = (DXC_OUT_KIND)(i + 1);
+      if (pResult->HasOutput(kind)) {
+        IFR(pResult->GetOutput(kind, IID_PPV_ARGS(&output.object), &output.name));
+        output.kind = kind;
+      }
+    }
+    return S_OK;
+  }
+
+  // All-in-one initialization
+  HRESULT Init(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+               const llvm::ArrayRef<DxcOutputObject> outputs) {
+    m_status = status;
+    m_resultType = resultType;
+    return SetOutputs(outputs);
+  }
+
+  // All-in-one create functions
+
+  static HRESULT Create(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+                        _In_opt_count_(numOutputs) const DxcOutputObject *pOutputs,
+                        _In_ unsigned numOutputs,
+                        _COM_Outptr_ IDxcResult **ppResult) {
+    *ppResult = nullptr;
+    CComPtr<DxcResult> result =
+      DxcResult::Alloc(DxcGetThreadMallocNoRef());
+    IFROOM(result.p);
+    IFR(result->Init(status, resultType, llvm::ArrayRef<DxcOutputObject>(pOutputs, numOutputs)));
+    *ppResult = result.Detach();
+    return S_OK;
+  }
+  static HRESULT Create(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+                        const llvm::ArrayRef<DxcOutputObject> outputs,
+                        _COM_Outptr_ IDxcResult **ppResult) {
+    return Create(status, resultType, outputs.data(), outputs.size(), ppResult);
+  }
+  // For convenient use in legacy interface implementations
+  static HRESULT Create(_In_ HRESULT status, _In_ DXC_OUT_KIND resultType,
+                        const llvm::ArrayRef<DxcOutputObject> outputs,
+                        _COM_Outptr_ IDxcOperationResult **ppResult) {
+    IDxcResult *pResult;
+    IFR(Create(status, resultType, outputs.data(), outputs.size(), &pResult));
+    *ppResult = pResult;
+    return S_OK;
   }
 };
 

+ 2 - 2
include/dxc/Support/dxcapi.use.h

@@ -166,8 +166,8 @@ void EnsureEnabled(DxcDllSupport &dxcSupport);
 void ReadFileIntoBlob(DxcDllSupport &dxcSupport, _In_ LPCWSTR pFileName,
                       _Outptr_ IDxcBlobEncoding **ppBlobEncoding);
 void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType = STD_OUTPUT_HANDLE);
-void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName);
-void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, HANDLE hFile, _In_opt_ LPCWSTR pFileName);
+void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName, _In_ UINT32 textCodePage);
+void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, _In_ HANDLE hFile, _In_opt_ LPCWSTR pFileName, _In_ UINT32 textCodePage);
 void WriteUtf8ToConsole(_In_opt_count_(charCount) const char *pText,
                         int charCount, DWORD streamType = STD_OUTPUT_HANDLE);
 void WriteUtf8ToConsoleSizeT(_In_opt_count_(charCount) const char *pText,

+ 1 - 1
include/dxc/Support/dxcfilesystem.h

@@ -43,7 +43,7 @@ public:
 };
 
 DxcArgsFileSystem *
-CreateDxcArgsFileSystem(_In_ IDxcBlob *pSource, _In_ LPCWSTR pSourceName,
+CreateDxcArgsFileSystem(_In_ IDxcBlobUtf8 *pSource, _In_ LPCWSTR pSourceName,
                         _In_opt_ IDxcIncludeHandler *pIncludeHandler);
 
 } // namespace dxcutil

+ 321 - 71
include/dxc/dxcapi.h

@@ -35,23 +35,6 @@ struct IMalloc;
 
 struct IDxcIncludeHandler;
 
-/// <summary>
-/// Creates a single uninitialized object of the class associated with a specified CLSID.
-/// </summary>
-/// <param name="rclsid">
-/// The CLSID associated with the data and code that will be used to create the object.
-/// </param>
-/// <param name="riid">
-/// A reference to the identifier of the interface to be used to communicate 
-/// with the object.
-/// </param>
-/// <param name="ppv">
-/// Address of pointer variable that receives the interface pointer requested
-/// in riid. Upon successful return, *ppv contains the requested interface
-/// pointer. Upon failure, *ppv contains NULL.</param>
-/// <remarks>
-/// While this function is similar to CoCreateInstance, there is no COM involvement.
-/// </remarks>
 typedef HRESULT (__stdcall *DxcCreateInstanceProc)(
     _In_ REFCLSID   rclsid,
     _In_ REFIID     riid,
@@ -83,18 +66,14 @@ typedef HRESULT(__stdcall *DxcCreateInstance2Proc)(
 /// While this function is similar to CoCreateInstance, there is no COM involvement.
 /// </remarks>
 
-#ifndef _MSC_VER
 extern "C"
-#endif
 DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance(
   _In_ REFCLSID   rclsid,
   _In_ REFIID     riid,
   _Out_ LPVOID*   ppv
   );
 
-#ifndef _MSC_VER
 extern "C"
-#endif
 DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
   _In_ IMalloc    *pMalloc,
   _In_ REFCLSID   rclsid,
@@ -102,6 +81,56 @@ DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
   _Out_ LPVOID*   ppv
 );
 
+// For convenience, equivalent definitions to CP_UTF8 and CP_UTF16.
+#define DXC_CP_UTF8 65001
+#define DXC_CP_UTF16 1200
+// Use DXC_CP_ACP for: Binary;  ANSI Text;  Autodetect UTF with BOM
+#define DXC_CP_ACP 0
+
+// This flag indicates that the shader hash was computed taking into account source information (-Zss)
+#define DXC_HASHFLAG_INCLUDES_SOURCE  1
+
+// Hash digest type for ShaderHash
+typedef struct DxcShaderHash {
+  UINT32 Flags; // DXC_HASHFLAG_*
+  BYTE HashDigest[16];
+} DxcShaderHash;
+
+#define DXC_FOURCC(ch0, ch1, ch2, ch3) (                     \
+  (UINT32)(UINT8)(ch0)        | (UINT32)(UINT8)(ch1) << 8  | \
+  (UINT32)(UINT8)(ch2) << 16  | (UINT32)(UINT8)(ch3) << 24   \
+  )
+#define DXC_PART_PDB                      DXC_FOURCC('I', 'L', 'D', 'B')
+#define DXC_PART_PDB_NAME                 DXC_FOURCC('I', 'L', 'D', 'N')
+#define DXC_PART_PRIVATE_DATA             DXC_FOURCC('P', 'R', 'I', 'V')
+#define DXC_PART_ROOT_SIGNATURE           DXC_FOURCC('R', 'T', 'S', '0')
+#define DXC_PART_DXIL                     DXC_FOURCC('D', 'X', 'I', 'L')
+#define DXC_PART_REFLECTION_DATA          DXC_FOURCC('R', 'D', 'A', 'T')
+#define DXC_PART_SHADER_HASH              DXC_FOURCC('H', 'A', 'S', 'H')
+#define DXC_PART_INPUT_SIGNATURE          DXC_FOURCC('I', 'S', 'G', '1')
+#define DXC_PART_OUTPUT_SIGNATURE         DXC_FOURCC('O', 'S', 'G', '1')
+#define DXC_PART_PATCH_CONSTANT_SIGNATURE DXC_FOURCC('P', 'S', 'G', '1')
+
+// Some option arguments are defined here for continuity with D3DCompile interface
+#define DXC_ARG_DEBUG L"-Zi"
+#define DXC_ARG_SKIP_VALIDATION L"-Vd"
+#define DXC_ARG_SKIP_OPTIMIZATIONS L"-Od"
+#define DXC_ARG_PACK_MATRIX_ROW_MAJOR L"-Zpr"
+#define DXC_ARG_PACK_MATRIX_COLUMN_MAJOR L"-Zpc"
+#define DXC_ARG_AVOID_FLOW_CONTROL L"-Gfa"
+#define DXC_ARG_PREFER_FLOW_CONTROL L"-Gfp"
+#define DXC_ARG_ENABLE_STRICTNESS L"-Ges"
+#define DXC_ARG_ENABLE_BACKWARDS_COMPATIBILITY L"-Gec"
+#define DXC_ARG_IEEE_STRICTNESS L"-Gis"
+#define DXC_ARG_OPTIMIZATION_LEVEL0 L"-O0"
+#define DXC_ARG_OPTIMIZATION_LEVEL1 L"-O1"
+#define DXC_ARG_OPTIMIZATION_LEVEL2 L"-O2"
+#define DXC_ARG_OPTIMIZATION_LEVEL3 L"-O3"
+#define DXC_ARG_WARNINGS_ARE_ERRORS L"-WX"
+#define DXC_ARG_RESOURCES_MAY_ALIAS L"-res_may_alias"
+#define DXC_ARG_ALL_RESOURCES_BOUND L"-all_resources_bound"
+#define DXC_ARG_DEBUG_NAME_FOR_SOURCE L"-Zss"
+#define DXC_ARG_DEBUG_NAME_FOR_BINARY L"-Zsb"
 
 // IDxcBlob is an alias of ID3D10Blob and ID3DBlob
 struct __declspec(uuid("8BA5FB08-5195-40e2-AC58-0D989C3A0102"))
@@ -122,70 +151,144 @@ public:
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobEncoding)
 };
 
+// Notes on IDxcBlobUtf16 and IDxcBlobUtf8
+// These guarantee null-terminated text and the stated encoding.
+// GetBufferSize() will return the size in bytes, including null-terminator
+// GetStringLength() will return the length in characters, excluding the null-terminator
+// Name strings will use IDxcBlobUtf16, while other string output blobs,
+// such as errors/warnings, preprocessed HLSL, or other text will be based
+// on the -encoding option.
+
+// The API will use this interface for output name strings
+struct __declspec(uuid("A3F84EAB-0FAA-497E-A39C-EE6ED60B2D84"))
+IDxcBlobUtf16 : public IDxcBlobEncoding {
+public:
+  virtual LPCWSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
+  virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf16)
+};
+struct __declspec(uuid("3DA636C9-BA71-4024-A301-30CBF125305B"))
+IDxcBlobUtf8 : public IDxcBlobEncoding {
+public:
+  virtual LPCSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
+  virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf8)
+};
+
+struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c"))
+IDxcIncludeHandler : public IUnknown {
+  virtual HRESULT STDMETHODCALLTYPE LoadSource(
+    _In_z_ LPCWSTR pFilename,                                 // Candidate filename.
+    _COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource  // Resultant source object for included file, nullptr if not found.
+    ) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler)
+};
+
+// Structure for supplying bytes or text input to Dxc APIs.
+// Use Encoding = 0 for non-text bytes, ANSI text, or unknown with BOM.
+typedef struct DxcBuffer {
+  LPCVOID Ptr;
+  SIZE_T Size;
+  UINT Encoding;
+} DxcText;
+
+struct DxcDefine {
+  LPCWSTR Name;
+  _Maybenull_ LPCWSTR Value;
+};
+
+struct __declspec(uuid("73EFFE2A-70DC-45F8-9690-EFF64C02429D"))
+IDxcCompilerArgs : public IUnknown {
+  // Pass GetArguments() and GetCount() to Compile
+  virtual LPCWSTR* STDMETHODCALLTYPE GetArguments() = 0;
+  virtual UINT32 STDMETHODCALLTYPE GetCount() = 0;
+
+  // Add additional arguments or defines here, if desired.
+  virtual HRESULT STDMETHODCALLTYPE AddArguments(
+    _In_opt_count_(argCount) LPCWSTR *pArguments,       // Array of pointers to arguments to add
+    _In_ UINT32 argCount                                // Number of arguments to add
+  ) = 0;
+  virtual HRESULT STDMETHODCALLTYPE AddArgumentsUTF8(
+    _In_opt_count_(argCount)LPCSTR *pArguments,         // Array of pointers to UTF-8 arguments to add
+    _In_ UINT32 argCount                                // Number of arguments to add
+  ) = 0;
+  virtual HRESULT STDMETHODCALLTYPE AddDefines(
+      _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines
+      _In_ UINT32 defineCount                            // Number of defines
+  ) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompilerArgs)
+};
+
+//////////////////////////
+// Legacy Interfaces
+/////////////////////////
+
+// NOTE: IDxcUtils replaces IDxcLibrary
 struct __declspec(uuid("e5204dc7-d18c-4c3c-bdfb-851673980fe7"))
 IDxcLibrary : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
     _In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobFromFile(
-    LPCWSTR pFileName, _In_opt_ UINT32* codePage,
+    _In_z_ LPCWSTR pFileName, _In_opt_ UINT32* codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned(
     _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy(
-       _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
-      _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+    _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc(
     _In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateIncludeHandler(
-      _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
+    _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
   virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly(
-      _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
   virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
-      _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
   virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
-      _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
 
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLibrary)
 };
 
+// NOTE: IDxcResult replaces IDxcOperationResult
 struct __declspec(uuid("CEDB484A-D4E9-445A-B991-CA21CA157DC2"))
 IDxcOperationResult : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0;
-  virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **pResult) = 0;
-  virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **pErrors) = 0;
 
-  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOperationResult)
-};
+  // GetResult returns the main result of the operation.
+  // This corresponds to:
+  // DXC_OUT_OBJECT - Compile() with shader or library target
+  // DXC_OUT_DISASSEMBLY - Disassemble()
+  // DXC_OUT_HLSL - Compile() with -P
+  // DXC_OUT_ROOT_SIGNATURE - Compile() with rootsig_* target
+  virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0;
 
-struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c"))
-IDxcIncludeHandler : public IUnknown {
-  virtual HRESULT STDMETHODCALLTYPE LoadSource(
-    _In_ LPCWSTR pFilename,                                   // Candidate filename.
-    _COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource  // Resultant source object for included file, nullptr if not found.
-    ) = 0;
+  // GetErrorBuffer Corresponds to DXC_OUT_ERRORS.
+  virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) = 0;
 
-  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler)
-};
-
-struct DxcDefine {
-  LPCWSTR Name;
-  _Maybenull_ LPCWSTR Value;
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOperationResult)
 };
 
+// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
 struct __declspec(uuid("8c210bf3-011f-4422-8d70-6f9acb8db617"))
 IDxcCompiler : public IUnknown {
   // Compile a single entry point to the target shader model
   virtual HRESULT STDMETHODCALLTYPE Compile(
     _In_ IDxcBlob *pSource,                       // Source text to compile
-    _In_opt_ LPCWSTR pSourceName,                 // Optional file name for pSource. Used in errors and include handlers.
-    _In_ LPCWSTR pEntryPoint,                     // entry point name
-    _In_ LPCWSTR pTargetProfile,                  // shader profile to compile
-    _In_count_(argCount) LPCWSTR *pArguments,     // Array of pointers to arguments
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_z_ LPCWSTR pEntryPoint,               // entry point name
+    _In_z_ LPCWSTR pTargetProfile,                // shader profile to compile
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
     _In_ UINT32 argCount,                         // Number of arguments
-    _In_count_(defineCount) const DxcDefine *pDefines,  // Array of defines
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult   // Compiler output status, buffer, and errors
@@ -194,10 +297,11 @@ IDxcCompiler : public IUnknown {
   // Preprocess source text
   virtual HRESULT STDMETHODCALLTYPE Preprocess(
     _In_ IDxcBlob *pSource,                       // Source text to preprocess
-    _In_opt_ LPCWSTR pSourceName,                 // Optional file name for pSource. Used in errors and include handlers.
-    _In_count_(argCount) LPCWSTR *pArguments,     // Array of pointers to arguments
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
     _In_ UINT32 argCount,                         // Number of arguments
-    _In_count_(defineCount) const DxcDefine *pDefines,  // Array of defines
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult   // Preprocessor output status, buffer, and errors
@@ -212,21 +316,23 @@ IDxcCompiler : public IUnknown {
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler)
 };
 
+// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
 struct __declspec(uuid("A005A9D9-B8BB-4594-B5C9-0E633BEC4D37"))
 IDxcCompiler2 : public IDxcCompiler {
   // Compile a single entry point to the target shader model with debug information.
   virtual HRESULT STDMETHODCALLTYPE CompileWithDebug(
     _In_ IDxcBlob *pSource,                       // Source text to compile
-    _In_opt_ LPCWSTR pSourceName,                 // Optional file name for pSource. Used in errors and include handlers.
-    _In_ LPCWSTR pEntryPoint,                     // Entry point name
-    _In_ LPCWSTR pTargetProfile,                  // Shader profile to compile
-    _In_count_(argCount) LPCWSTR *pArguments,     // Array of pointers to arguments
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_z_ LPCWSTR pEntryPoint,               // Entry point name
+    _In_z_ LPCWSTR pTargetProfile,                // Shader profile to compile
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
     _In_ UINT32 argCount,                         // Number of arguments
-    _In_count_(defineCount) const DxcDefine *pDefines,  // Array of defines
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult,  // Compiler output status, buffer, and errors
-    _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob.
+    _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. (Must be HeapFree()'d!)
     _COM_Outptr_opt_ IDxcBlob **ppDebugBlob       // Debug blob
   ) = 0;
 
@@ -238,28 +344,164 @@ IDxcLinker : public IUnknown {
 public:
   // Register a library with name to ref it later.
   virtual HRESULT RegisterLibrary(
-      _In_opt_ LPCWSTR pLibName,         // Name of the library.
-      _In_ IDxcBlob *pLib                // Library blob.
+    _In_opt_ LPCWSTR pLibName,          // Name of the library.
+    _In_ IDxcBlob *pLib                 // Library blob.
   ) = 0;
 
   // Links the shader and produces a shader blob that the Direct3D runtime can
   // use.
   virtual HRESULT STDMETHODCALLTYPE Link(
-      _In_opt_ LPCWSTR pEntryName, // Entry point name
-      _In_ LPCWSTR pTargetProfile, // shader profile to link
-      _In_count_(libCount)
-          const LPCWSTR *pLibNames, // Array of library names to link
-      UINT32 libCount,              // Number of libraries to link
-      _In_count_(argCount)
-          const LPCWSTR *pArguments, // Array of pointers to arguments
-      _In_ UINT32 argCount,          // Number of arguments
-      _COM_Outptr_ IDxcOperationResult *
-          *ppResult // Linker output status, buffer, and errors
+    _In_opt_ LPCWSTR pEntryName,        // Entry point name
+    _In_ LPCWSTR pTargetProfile,        // shader profile to link
+    _In_count_(libCount)
+        const LPCWSTR *pLibNames,       // Array of library names to link
+    _In_ UINT32 libCount,               // Number of libraries to link
+    _In_opt_count_(argCount) const LPCWSTR *pArguments, // Array of pointers to arguments
+    _In_ UINT32 argCount,               // Number of arguments
+    _COM_Outptr_
+        IDxcOperationResult **ppResult  // Linker output status, buffer, and errors
   ) = 0;
 
   DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLinker)
 };
 
+/////////////////////////
+// Latest interfaces. Please use these
+////////////////////////
+
+// NOTE: IDxcUtils replaces IDxcLibrary
+struct __declspec(uuid("4605C4CB-2019-492A-ADA4-65F20BB7D67F"))
+IDxcUtils : public IUnknown {
+  // Create a sub-blob that holds a reference to the outer blob and points to its memory.
+  virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
+    _In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
+
+  // For codePage, use 0 (or DXC_CP_ACP) for raw binary or ANSI code page
+
+  // Creates a blob referencing existing memory, with no copy.
+  // User must manage the memory lifetime separately.
+  // (was: CreateBlobWithEncodingFromPinned)
+  virtual HRESULT STDMETHODCALLTYPE CreateBlobFromPinned(
+    _In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  // Create blob, taking ownership of memory allocated with supplied allocator.
+  // (was: CreateBlobWithEncodingOnMalloc)
+  virtual HRESULT STDMETHODCALLTYPE MoveToBlob(
+    _In_bytecount_(size) LPCVOID pData, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  ////
+  // New blobs and copied contents are allocated with the current allocator
+
+  // Copy blob contents to memory owned by the new blob.
+  // (was: CreateBlobWithEncodingOnHeapCopy)
+  virtual HRESULT STDMETHODCALLTYPE CreateBlob(
+    _In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  // (was: CreateBlobFromFile)
+  virtual HRESULT STDMETHODCALLTYPE LoadFile(
+    _In_z_ LPCWSTR pFileName, _In_opt_ UINT32* pCodePage,
+    _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE CreateReadOnlyStreamFromBlob(
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
+
+  // Create default file-based include handler
+  virtual HRESULT STDMETHODCALLTYPE CreateDefaultIncludeHandler(
+    _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
+
+  // Convert or return matching encoded text blobs
+  virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf8 **pBlobEncoding) = 0;
+  virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
+    _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf16 **pBlobEncoding) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE GetDxilContainerPart(
+    _In_ const DxcBuffer *pShader,
+    _In_ UINT32 DxcPart,
+    _Outptr_result_nullonfailure_ void **ppPartData,
+    _Out_ UINT32 *pPartSizeInBytes) = 0;
+
+  // Create reflection interface from serialized Dxil container, or DXC_PART_REFLECTION_DATA.
+  // TBD: Require part header for RDAT?  (leaning towards yes)
+  virtual HRESULT STDMETHODCALLTYPE CreateReflection(
+    _In_ const DxcBuffer *pData, REFIID iid, void **ppvReflection) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE BuildArguments(
+    _In_opt_z_ LPCWSTR pSourceName,               // Optional file name for pSource. Used in errors and include handlers.
+    _In_opt_z_ LPCWSTR pEntryPoint,               // Entry point name. (-E)
+    _In_z_ LPCWSTR pTargetProfile,                // Shader profile to compile. (-T)
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
+    _In_ UINT32 argCount,                         // Number of arguments
+    _In_count_(defineCount)
+      const DxcDefine *pDefines,                  // Array of defines
+    _In_ UINT32 defineCount,                      // Number of defines
+    _COM_Outptr_ IDxcCompilerArgs **ppArgs        // Arguments you can use with Compile() method
+  ) = 0;
+
+  // Takes the shader PDB and returns the hash and the container inside it
+  virtual HRESULT STDMETHODCALLTYPE GetPDBContents(
+    _In_ IDxcBlob *pPDBBlob, _COM_Outptr_ IDxcBlob **ppHash, _COM_Outptr_ IDxcBlob **ppContainer) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcUtils)
+};
+
+// For use with IDxcResult::[Has|Get]Output dxcOutKind argument
+// Note: text outputs returned from version 2 APIs are UTF-8 or UTF-16 based on -encoding option
+typedef enum DXC_OUT_KIND {
+  DXC_OUT_NONE = 0,
+  DXC_OUT_OBJECT = 1,         // IDxcBlob - Shader or library object
+  DXC_OUT_ERRORS = 2,         // IDxcBlobUtf8 or IDxcBlobUtf16
+  DXC_OUT_PDB = 3,            // IDxcBlob
+  DXC_OUT_SHADER_HASH = 4,    // IDxcBlob - DxcShaderHash of shader or shader with source info (-Zsb/-Zss)
+  DXC_OUT_DISASSEMBLY = 5,    // IDxcBlobUtf8 or IDxcBlobUtf16 - from Disassemble
+  DXC_OUT_HLSL = 6,           // IDxcBlobUtf8 or IDxcBlobUtf16 - from Preprocessor or Rewriter
+  DXC_OUT_TEXT = 7,           // IDxcBlobUtf8 or IDxcBlobUtf16 - other text, such as -ast-dump or -Odump
+  DXC_OUT_REFLECTION = 8,     // IDxcBlob - RDAT part with reflection data
+  DXC_OUT_ROOT_SIGNATURE = 9, // IDxcBlob - Serialized root signature output
+
+  DXC_OUT_FORCE_DWORD = 0xFFFFFFFF
+} DXC_OUT_KIND;
+
+struct __declspec(uuid("58346CDA-DDE7-4497-9461-6F87AF5E0659"))
+IDxcResult : public IDxcOperationResult {
+  virtual BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) = 0;
+  virtual HRESULT STDMETHODCALLTYPE GetOutput(_In_ DXC_OUT_KIND dxcOutKind,
+    _In_ REFIID iid, _COM_Outptr_opt_result_maybenull_ void **ppvObject,
+    _COM_Outptr_ IDxcBlobUtf16 **ppOutputName) = 0;
+
+  virtual UINT32 GetNumOutputs() = 0;
+  virtual DXC_OUT_KIND GetOutputByIndex(UINT32 Index) = 0;
+  virtual DXC_OUT_KIND PrimaryOutput() = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcResult)
+};
+
+struct __declspec(uuid("228B4687-5A6A-4730-900C-9702B2203F54"))
+IDxcCompiler3 : public IUnknown {
+  // Compile a single entry point to the target shader model,
+  // Compile a library to a library target (-T lib_*),
+  // Compile a root signature (-T rootsig_*), or
+  // Preprocess HLSL source (-P)
+  virtual HRESULT STDMETHODCALLTYPE Compile(
+    _In_ const DxcBuffer *pSource,                // Source text to compile
+    _In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
+    _In_ UINT32 argCount,                         // Number of arguments
+    _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
+    _In_ REFIID riid, _Out_ LPVOID *ppResult      // IDxcResult: status, buffer, and errors
+  ) = 0;
+
+  // Disassemble a program.
+  virtual HRESULT STDMETHODCALLTYPE Disassemble(
+    _In_ const DxcBuffer *pObject,                // Program to disassemble: dxil container or bitcode.
+    _In_ REFIID riid, _Out_ LPVOID *ppResult      // IDxcResult: status, disassembly text, and errors
+    ) = 0;
+
+  DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler3)
+};
+
 static const UINT32 DxcValidatorFlags_Default = 0;
 static const UINT32 DxcValidatorFlags_InPlaceEdit = 1;  // Validator is allowed to update shader blob in-place.
 static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2;
@@ -361,7 +603,6 @@ IDxcVersionInfo2 : public IDxcVersionInfo {
 #define CLSID_SCOPE
 #endif
 
-// {73e22d93-e6ce-47f3-b5bf-f0664f39c1b0}
 CLSID_SCOPE const CLSID CLSID_DxcCompiler = {
     0x73e22d93,
     0xe6ce,
@@ -382,6 +623,13 @@ CLSID_SCOPE const CLSID CLSID_DxcDiaDataSource = {
     0x484d,
     {0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f}};
 
+// {3E56AE82-224D-470F-A1A1-FE3016EE9F9D}
+CLSID_SCOPE const CLSID CLSID_DxcCompilerArgs = {
+    0x3e56ae82,
+    0x224d,
+    0x470f,
+    {0xa1, 0xa1, 0xfe, 0x30, 0x16, 0xee, 0x9f, 0x9d}};
+
 // {6245D6AF-66E0-48FD-80B4-4D271796748C}
 CLSID_SCOPE const GUID CLSID_DxcLibrary = {
     0x6245d6af,
@@ -389,6 +637,8 @@ CLSID_SCOPE const GUID CLSID_DxcLibrary = {
     0x48fd,
     {0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c}};
 
+CLSID_SCOPE const GUID CLSID_DxcUtils = CLSID_DxcLibrary;
+
 // {8CA3E215-F728-4CF3-8CDD-88AF917587A1}
 CLSID_SCOPE const GUID CLSID_DxcValidator = {
     0x8ca3e215,

+ 9 - 1
include/dxc/dxcdxrfallbackcompiler.h

@@ -96,8 +96,16 @@ struct __declspec(uuid("76bb3c85-006d-4b72-9e10-63cd97df57f0"))
   ) = 0;
 };
 
+// Note: __declspec(selectany) requires 'extern'
+// On Linux __declspec(selectany) is removed and using 'extern' results in link error.
+#ifdef _MSC_VER
+#define CLSID_SCOPE __declspec(selectany) extern
+#else
+#define CLSID_SCOPE
+#endif
+
 // {76bb3c85-006d-4b72-9e10-63cd97df57f0}
-__declspec(selectany) extern const GUID CLSID_DxcDxrFallbackCompiler = {
+CLSID_SCOPE const GUID CLSID_DxcDxrFallbackCompiler = {
   0x76bb3c85,
   0x006d,
   0x4b72,

+ 5 - 0
include/llvm/Analysis/InstructionSimplify.h

@@ -322,6 +322,11 @@ namespace llvm {
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr);
 
+// HLSL Change - Begin
+  Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+                          Type *Ty, const DataLayout &DL);
+// HLSL Change - End
+
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// instruction.  If not, this returns null.
   Value *SimplifyInstruction(Instruction *I, const DataLayout &DL,

+ 51 - 0
include/llvm/IR/IRBuilder.h

@@ -517,6 +517,7 @@ template<bool preserveNames = true, typename T = ConstantFolder,
 class IRBuilder : public IRBuilderBase, public Inserter {
   T Folder;
 public:
+  bool AllowFolding = true; // HLSL Change - Runtime flag on whether to do folding
   IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
             MDNode *FPMathTag = nullptr)
     : IRBuilderBase(C, FPMathTag), Inserter(I), Folder(F) {
@@ -703,6 +704,7 @@ private:
 public:
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
@@ -717,6 +719,7 @@ public:
   }
   Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFAdd(LC, RC), Name);
@@ -725,6 +728,7 @@ public:
   }
   Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSub(LC, RC, HasNUW, HasNSW), Name);
@@ -739,6 +743,7 @@ public:
   }
   Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFSub(LC, RC), Name);
@@ -747,6 +752,7 @@ public:
   }
   Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateMul(LC, RC, HasNUW, HasNSW), Name);
@@ -761,6 +767,7 @@ public:
   }
   Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFMul(LC, RC), Name);
@@ -769,6 +776,7 @@ public:
   }
   Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
@@ -781,6 +789,7 @@ public:
   }
   Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
@@ -793,6 +802,7 @@ public:
   }
   Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFDiv(LC, RC), Name);
@@ -800,12 +810,14 @@ public:
                                       FPMathTag, FMF), Name);
   }
   Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateURem(LC, RC), Name);
     return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
   }
   Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateSRem(LC, RC), Name);
@@ -813,6 +825,7 @@ public:
   }
   Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFRem(LC, RC), Name);
@@ -822,6 +835,7 @@ public:
 
   Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
@@ -841,6 +855,7 @@ public:
 
   Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
@@ -859,6 +874,7 @@ public:
 
   Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
@@ -876,6 +892,7 @@ public:
   }
 
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
       if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
         return LHS;  // LHS & -1 -> LHS
@@ -892,6 +909,7 @@ public:
   }
 
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
       if (RC->isNullValue())
         return LHS;  // LHS | 0 -> LHS
@@ -908,6 +926,7 @@ public:
   }
 
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateXor(LC, RC), Name);
@@ -923,6 +942,7 @@ public:
   Value *CreateBinOp(Instruction::BinaryOps Opc,
                      Value *LHS, Value *RHS, const Twine &Name = "",
                      MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
@@ -934,6 +954,7 @@ public:
 
   Value *CreateNeg(Value *V, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
     BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
@@ -949,12 +970,14 @@ public:
   }
   Value *CreateFNeg(Value *V, const Twine &Name = "",
                     MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFNeg(VC), Name);
     return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V),
                                       FPMathTag, FMF), Name);
   }
   Value *CreateNot(Value *V, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNot(VC), Name);
     return Insert(BinaryOperator::CreateNot(V), Name);
@@ -1035,6 +1058,7 @@ public:
   }
   Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
                    const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       size_t i, e;
@@ -1052,6 +1076,7 @@ public:
   }
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
                            const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       size_t i, e;
@@ -1068,6 +1093,7 @@ public:
     return CreateGEP(nullptr, Ptr, Idx, Name);
   }
   Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateGetElementPtr(Ty, PC, IC), Name);
@@ -1075,6 +1101,7 @@ public:
   }
   Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx,
                            const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, IC), Name);
@@ -1087,6 +1114,7 @@ public:
                             const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
 
@@ -1096,6 +1124,7 @@ public:
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
 
@@ -1108,6 +1137,7 @@ public:
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
 
@@ -1120,6 +1150,7 @@ public:
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
 
@@ -1128,6 +1159,7 @@ public:
   Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
 
@@ -1137,6 +1169,7 @@ public:
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
 
@@ -1149,6 +1182,7 @@ public:
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
 
@@ -1161,6 +1195,7 @@ public:
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
+    if (AllowFolding)
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
                     Name);
@@ -1262,6 +1297,7 @@ public:
                              const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
@@ -1270,6 +1306,7 @@ public:
                              const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
@@ -1278,6 +1315,7 @@ public:
                               const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
@@ -1286,6 +1324,7 @@ public:
                     const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
     return Insert(CastInst::Create(Op, V, DestTy), Name);
@@ -1294,6 +1333,7 @@ public:
                            const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
     return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
@@ -1304,6 +1344,7 @@ public:
     if (V->getType() == DestTy)
       return V;
 
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V)) {
       return Insert(Folder.CreatePointerBitCastOrAddrSpaceCast(VC, DestTy),
                     Name);
@@ -1317,6 +1358,7 @@ public:
                        const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
@@ -1342,6 +1384,7 @@ public:
   Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFPCast(VC, DestTy), Name);
     return Insert(CastInst::CreateFPCast(V, DestTy), Name);
@@ -1441,6 +1484,7 @@ public:
 
   Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateICmp(P, LC, RC), Name);
@@ -1448,6 +1492,7 @@ public:
   }
   Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    if (AllowFolding)
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFCmp(P, LC, RC), Name);
@@ -1481,6 +1526,7 @@ public:
 
   Value *CreateSelect(Value *C, Value *True, Value *False,
                       const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *CC = dyn_cast<Constant>(C))
       if (Constant *TC = dyn_cast<Constant>(True))
         if (Constant *FC = dyn_cast<Constant>(False))
@@ -1494,6 +1540,7 @@ public:
 
   Value *CreateExtractElement(Value *Vec, Value *Idx,
                               const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *IC = dyn_cast<Constant>(Idx))
         return Insert(Folder.CreateExtractElement(VC, IC), Name);
@@ -1507,6 +1554,7 @@ public:
 
   Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
                              const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *NC = dyn_cast<Constant>(NewElt))
         if (Constant *IC = dyn_cast<Constant>(Idx))
@@ -1521,6 +1569,7 @@ public:
 
   Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
                              const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *V1C = dyn_cast<Constant>(V1))
       if (Constant *V2C = dyn_cast<Constant>(V2))
         if (Constant *MC = dyn_cast<Constant>(Mask))
@@ -1541,6 +1590,7 @@ public:
   Value *CreateExtractValue(Value *Agg,
                             ArrayRef<unsigned> Idxs,
                             const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
     return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
@@ -1549,6 +1599,7 @@ public:
   Value *CreateInsertValue(Value *Agg, Value *Val,
                            ArrayRef<unsigned> Idxs,
                            const Twine &Name = "") {
+    if (AllowFolding)
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
         return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);

+ 3 - 0
include/llvm/InitializePasses.h

@@ -261,6 +261,9 @@ void initializeResourceToHandlePass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
 void initializeHoistConstantArrayPass(PassRegistry&);
 void initializeDxilLoopUnrollPass(PassRegistry&);
+void initializeDxilInsertNoopsPass(PassRegistry&);
+void initializeDxilFinalizeNoopsPass(PassRegistry&);
+void initializeDxilEliminateVectorPass(PassRegistry&);
 void initializeDxilConditionalMem2RegPass(PassRegistry&);
 void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 // HLSL Change Ends

+ 10 - 0
include/llvm/Transforms/Scalar.h

@@ -143,6 +143,15 @@ void initializeDxilLoopUnrollPass(PassRegistry&);
 Pass *createDxilEraseDeadRegionPass();
 void initializeDxilEraseDeadRegionPass(PassRegistry&);
 
+Pass *createDxilEliminateVectorPass();
+void initializeDxilEliminateVectorPass(PassRegistry&);
+
+Pass *createDxilInsertNoopsPass();
+void initializeDxilInsertNoopsPass(PassRegistry&);
+
+Pass *createDxilFinalizeNoopsPass();
+void initializeDxilFinalizeNoopsPass(PassRegistry&);
+
 //===----------------------------------------------------------------------===//
 //
 // LowerStaticGlobalIntoAlloca. Replace static globals with alloca if only used
@@ -494,6 +503,7 @@ FunctionPass *createSampleProfileLoaderPass(StringRef Name);
 // ScalarizerPass - Converts vector operations into scalar operations
 //
 FunctionPass *createScalarizerPass();
+FunctionPass *createScalarizerPass(bool NoOpt);
 
 //===----------------------------------------------------------------------===//
 //

+ 335 - 6
lib/Analysis/InstructionSimplify.cpp

@@ -3941,6 +3941,333 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
                         Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
+// HLSL Change - Begin
+// Copied CastInst simplification from LLVM 8
+
+static
+Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
+                                        Constant *C, Type *SrcEltTy,
+                                        unsigned NumSrcElts,
+                                        const DataLayout &DL) {
+  // Now that we know that the input value is a vector of integers, just shift
+  // and insert them into our result.
+  unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
+  for (unsigned i = 0; i != NumSrcElts; ++i) {
+    Constant *Element;
+    if (DL.isLittleEndian())
+      Element = C->getAggregateElement(NumSrcElts - i - 1);
+    else
+      Element = C->getAggregateElement(i);
+
+    if (Element && isa<UndefValue>(Element)) {
+      Result <<= BitShift;
+      continue;
+    }
+
+    auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
+    if (!ElementCI)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    Result <<= BitShift;
+    Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+  }
+
+  return nullptr;
+}
+
+/// Constant fold bitcast, symbolically evaluating it with DataLayout.
+/// This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static
+Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
+  // Catch the obvious splat cases.
+  if (C->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+  if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+      !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+    return Constant::getAllOnesValue(DestTy);
+
+  if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
+    // Handle a vector->scalar integer/fp cast.
+    if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
+      unsigned NumSrcElts = VTy->getNumElements();
+      Type *SrcEltTy = VTy->getElementType();
+
+      // If the vector is a vector of floating point, convert it to vector of int
+      // to simplify things.
+      if (SrcEltTy->isFloatingPointTy()) {
+        unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+        Type *SrcIVTy =
+          VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+        // Ask IR to do the conversion now that #elts line up.
+        C = ConstantExpr::getBitCast(C, SrcIVTy);
+      }
+
+      APInt Result(DL.getTypeSizeInBits(DestTy), 0);
+      if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
+                                                SrcEltTy, NumSrcElts, DL))
+        return CE;
+
+      if (isa<IntegerType>(DestTy))
+        return ConstantInt::get(DestTy, Result);
+
+      APFloat FP(DestTy->getFltSemantics(), Result);
+      return ConstantFP::get(DestTy->getContext(), FP);
+    }
+  }
+
+  // The code below only handles casts to vectors currently.
+  auto *DestVTy = dyn_cast<VectorType>(DestTy);
+  if (!DestVTy)
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+  // vector so the code below can handle it uniformly.
+  if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+    Constant *Ops = C; // don't take the address of C!
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
+  }
+
+  // If this is a bitcast from constant vector -> vector, fold it.
+  if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  // If the element types match, IR can fold it.
+  unsigned NumDstElt = DestVTy->getNumElements();
+  unsigned NumSrcElt = C->getType()->getVectorNumElements();
+  if (NumDstElt == NumSrcElt)
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  Type *SrcEltTy = C->getType()->getVectorElementType();
+  Type *DstEltTy = DestVTy->getElementType();
+
+  // Otherwise, we're changing the number of elements in a vector, which
+  // requires endianness information to do the right thing.  For example,
+  //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  // folds to (little endian):
+  //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  // and to (big endian):
+  //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+  // First thing is first.  We only want to think about integer here, so if
+  // we have something in FP form, recast it as integer.
+  if (DstEltTy->isFloatingPointTy()) {
+    // Fold to an vector of integers with same size as our FP type.
+    unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+    Type *DestIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+    // Recursively handle this integer conversion, if possible.
+    C = FoldBitCast(C, DestIVTy, DL);
+
+    // Finally, IR can handle this now that #elts line up.
+    return ConstantExpr::getBitCast(C, DestTy);
+  }
+
+  // Okay, we know the destination is integer, if the input is FP, convert
+  // it to integer first.
+  if (SrcEltTy->isFloatingPointTy()) {
+    unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+    Type *SrcIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+    // Ask IR to do the conversion now that #elts line up.
+    C = ConstantExpr::getBitCast(C, SrcIVTy);
+    // If IR wasn't able to fold it, bail out.
+    if (!isa<ConstantVector>(C) &&  // FIXME: Remove ConstantVector.
+        !isa<ConstantDataVector>(C))
+      return C;
+  }
+
+  // Now we know that the input and output vectors are both integer vectors
+  // of the same size, and that their #elements is not the same.  Do the
+  // conversion here, which depends on whether the input or output has
+  // more elements.
+  bool isLittleEndian = DL.isLittleEndian();
+
+  SmallVector<Constant*, 32> Result;
+  if (NumDstElt < NumSrcElt) {
+    // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+    Constant *Zero = Constant::getNullValue(DstEltTy);
+    unsigned Ratio = NumSrcElt/NumDstElt;
+    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned SrcElt = 0;
+    for (unsigned i = 0; i != NumDstElt; ++i) {
+      // Build each element of the result.
+      Constant *Elt = Zero;
+      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        Constant *Src = C->getAggregateElement(SrcElt++);
+        if (Src && isa<UndefValue>(Src))
+          Src = Constant::getNullValue(C->getType()->getVectorElementType());
+        else
+          Src = dyn_cast_or_null<ConstantInt>(Src);
+        if (!Src)  // Reject constantexpr elements.
+          return ConstantExpr::getBitCast(C, DestTy);
+
+        // Zero extend the element to the right size.
+        Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+        // Shift it to the right place, depending on endianness.
+        Src = ConstantExpr::getShl(Src,
+                                   ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+        // Mix it in.
+        Elt = ConstantExpr::getOr(Elt, Src);
+      }
+      Result.push_back(Elt);
+    }
+    return ConstantVector::get(Result);
+  }
+
+  // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  unsigned Ratio = NumDstElt/NumSrcElt;
+  unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
+
+  // Loop over each source value, expanding into multiple results.
+  for (unsigned i = 0; i != NumSrcElt; ++i) {
+    auto *Element = C->getAggregateElement(i);
+
+    if (!Element) // Reject constantexpr elements.
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    if (isa<UndefValue>(Element)) {
+      // Correctly Propagate undef values.
+      Result.append(Ratio, UndefValue::get(DstEltTy));
+      continue;
+    }
+
+    auto *Src = dyn_cast<ConstantInt>(Element);
+    if (!Src)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+    for (unsigned j = 0; j != Ratio; ++j) {
+      // Shift the piece of the value into the right place, depending on
+      // endianness.
+      Constant *Elt = ConstantExpr::getLShr(Src,
+                                  ConstantInt::get(Src->getType(), ShiftAmt));
+      ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+      // Truncate the element to an integer with the same pointer size and
+      // convert the element back to a pointer using a inttoptr.
+      if (DstEltTy->isPointerTy()) {
+        IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
+        Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
+        Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
+        continue;
+      }
+
+      // Truncate and remember this piece.
+      Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+    }
+  }
+
+  return ConstantVector::get(Result);
+}
+
+static
+Constant *ConstantFoldCastOperand(unsigned Opcode, Constant *C,
+                                        Type *DestTy, const DataLayout &DL) {
+  assert(Instruction::isCast(Opcode));
+  switch (Opcode) {
+  default:
+    llvm_unreachable("Missing case");
+  case Instruction::PtrToInt:
+    // If the input is a inttoptr, eliminate the pair.  This requires knowing
+    // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+      if (CE->getOpcode() == Instruction::IntToPtr) {
+        Constant *Input = CE->getOperand(0);
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
+        unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
+        if (PtrWidth < InWidth) {
+          Constant *Mask =
+            ConstantInt::get(CE->getContext(),
+                             APInt::getLowBitsSet(InWidth, PtrWidth));
+          Input = ConstantExpr::getAnd(Input, Mask);
+        }
+        // Do a zext or trunc to get to the dest size.
+        return ConstantExpr::getIntegerCast(Input, DestTy, false);
+      }
+    }
+    return ConstantExpr::getCast(Opcode, C, DestTy);
+  case Instruction::IntToPtr:
+    // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+    // the int size is >= the ptr size and the address spaces are the same.
+    // This requires knowing the width of a pointer, so it can't be done in
+    // ConstantExpr::getCast.
+    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+      if (CE->getOpcode() == Instruction::PtrToInt) {
+        Constant *SrcPtr = CE->getOperand(0);
+        unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
+        unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+        if (MidIntSize >= SrcPtrSize) {
+          unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+          if (SrcAS == DestTy->getPointerAddressSpace())
+            return FoldBitCast(CE->getOperand(0), DestTy, DL);
+        }
+      }
+    }
+
+    return ConstantExpr::getCast(Opcode, C, DestTy);
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::AddrSpaceCast:
+      return ConstantExpr::getCast(Opcode, C, DestTy);
+  case Instruction::BitCast:
+    return FoldBitCast(C, DestTy, DL);
+  }
+}
+
+static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+                               Type *Ty, const DataLayout &DL) {
+  if (auto *C = dyn_cast<Constant>(Op))
+    return ConstantFoldCastOperand(CastOpc, C, Ty, DL);
+
+  if (auto *CI = dyn_cast<CastInst>(Op)) {
+    auto *Src = CI->getOperand(0);
+    Type *SrcTy = Src->getType();
+    Type *MidTy = CI->getType();
+    Type *DstTy = Ty;
+    if (Src->getType() == Ty) {
+      auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+      auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
+      Type *SrcIntPtrTy =
+          SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+      Type *MidIntPtrTy =
+          MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+      Type *DstIntPtrTy =
+          DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
+      if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy,
+                                         SrcIntPtrTy, MidIntPtrTy,
+                                         DstIntPtrTy) == Instruction::BitCast)
+        return Src;
+    }
+  }
+
+  // bitcast x -> x
+  if (CastOpc == Instruction::BitCast)
+    if (Op->getType() == Ty)
+      return Op;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op,
+                              Type *Ty, const DataLayout &DL) {
+  return ::SimplifyCastInst(CastOpc, Op, Ty, DL);
+}
+
+// HLSL Change - End
+
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
 Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
@@ -4075,12 +4402,14 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
     break;
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
-    // HLSL Change Begin - simplify dxil call.
-    if (hlsl::CanSimplify(CS.getCalledFunction())) {
-      SmallVector<Value *, 4> Args(CS.arg_begin(), CS.arg_end());
-      if (Value *DxilResult = hlsl::SimplifyDxilCall(CS.getCalledFunction(), Args, I)) {
-        Result = DxilResult;
-        break;
+    // HLSL Change Begin - simplify dxil calls.
+    if (Function *Callee = CS.getCalledFunction()) {
+      if (hlsl::CanSimplify(Callee)) {
+        SmallVector<Value *, 4> Args(CS.arg_begin(), CS.arg_end());
+        if (Value *DxilResult = hlsl::SimplifyDxilCall(CS.getCalledFunction(), Args, I)) {
+          Result = DxilResult;
+          break;
+        }
       }
     }
     // HLSL Change End.

+ 36 - 0
lib/DXIL/DxilMetadataHelper.cpp

@@ -22,6 +22,7 @@
 
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -48,6 +49,7 @@ const char DxilMDHelper::kDxilTypeSystemMDName[]                      = "dx.type
 const char DxilMDHelper::kDxilTypeSystemHelperVariablePrefix[]        = "dx.typevar.";
 const char DxilMDHelper::kDxilControlFlowHintMDName[]                 = "dx.controlflow.hints";
 const char DxilMDHelper::kDxilPreciseAttributeMDName[]                = "dx.precise";
+const char DxilMDHelper::kDxilVariableDebugLayoutMDName[]             = "dx.dbg.varlayout";
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
@@ -2401,4 +2403,38 @@ void DxilMDHelper::MarkNonUniform(Instruction *I) {
   I->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, preciseNode);
 }
 
+bool DxilMDHelper::GetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned &StartOffsetInBits, std::vector<DxilDIArrayDim> &ArrayDims) {
+  llvm::MDTuple *Tuple = dyn_cast_or_null<MDTuple>(inst->getMetadata(DxilMDHelper::kDxilVariableDebugLayoutMDName));
+  if (Tuple == nullptr) return false;
+
+  IFTBOOL(Tuple->getNumOperands() % 2 == 1, DXC_E_INCORRECT_DXIL_METADATA);
+
+  StartOffsetInBits = ConstMDToUint32(Tuple->getOperand(0));
+
+  for (unsigned Idx = 1; Idx < Tuple->getNumOperands(); Idx += 2) {
+    DxilDIArrayDim ArrayDim = {};
+    ArrayDim.StrideInBits = ConstMDToUint32(Tuple->getOperand(Idx + 0));
+    ArrayDim.NumElements = ConstMDToUint32(Tuple->getOperand(Idx + 1));
+    ArrayDims.emplace_back(ArrayDim);
+  }
+
+  return true;
+}
+
+void DxilMDHelper::SetVariableDebugLayout(llvm::DbgDeclareInst *inst,
+    unsigned StartOffsetInBits, const std::vector<DxilDIArrayDim> &ArrayDims) {
+  LLVMContext &Ctx = inst->getContext();
+
+  std::vector<Metadata*> MDVals;
+  MDVals.reserve(ArrayDims.size() + 1);
+  MDVals.emplace_back(Uint32ToConstMD(StartOffsetInBits, Ctx));
+  for (const DxilDIArrayDim &ArrayDim : ArrayDims) {
+    MDVals.emplace_back(Uint32ToConstMD(ArrayDim.StrideInBits, Ctx));
+    MDVals.emplace_back(Uint32ToConstMD(ArrayDim.NumElements, Ctx));
+  }
+
+  inst->setMetadata(DxilMDHelper::kDxilVariableDebugLayoutMDName, MDNode::get(Ctx, MDVals));
+}
+
 } // namespace hlsl

+ 6 - 2
lib/DXIL/DxilModule.cpp

@@ -1009,6 +1009,7 @@ namespace {
 template <typename TResource>
 static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<TResource>> &vec) {
   unsigned resID = 0;
+  std::unordered_set<GlobalVariable *> eraseList; // Need in case of duplicate defs of lib resources
   for (auto p = vec.begin(); p != vec.end();) {
     auto c = p++;
     Constant *symbol = (*c)->GetGlobalSymbol();
@@ -1016,7 +1017,7 @@ static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<T
     if (symbol->user_empty()) {
       p = vec.erase(c);
       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(symbol))
-        GV->eraseFromParent();
+        eraseList.insert(GV);
       continue;
     }
     if ((*c)->GetID() != resID) {
@@ -1024,6 +1025,9 @@ static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<T
     }
     resID++;
   }
+  for (auto gv : eraseList) {
+    gv->eraseFromParent();
+  }
 }
 }
 
@@ -1557,7 +1561,7 @@ static bool ResourceTypeRequiresTranslation(const StructType* Ty) {
         return true;
     }
     SequentialType *seqTy;
-    while (seqTy = dyn_cast<SequentialType>(eTy)) {
+    while ((seqTy = dyn_cast<SequentialType>(eTy))) {
       eTy = seqTy->getElementType();
     }
     if (eTy->getScalarSizeInBits() < 32) {

+ 2 - 1
lib/DXIL/DxilOperations.cpp

@@ -506,7 +506,8 @@ bool OP::IsDxilOpFuncName(StringRef name) {
 }
 
 bool OP::IsDxilOpFunc(const llvm::Function *F) {
-  if (!F->hasName())
+  // Test for null to allow IsDxilOpFunc(Call.getCalledFunc()) to be resilient to indirect calls
+  if (F == nullptr || !F->hasName())
     return false;
   return IsDxilOpFuncName(F->getName());
 }

+ 58 - 33
lib/DXIL/DxilPDB.cpp

@@ -42,6 +42,7 @@ static const char kMsfMagic[] = {'M',  'i',  'c',    'r', 'o', 's',  'o',  'f',
                                  'M',  'S',  'F',    ' ', '7', '.',  '0',  '0',
                                  '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'};
 
+static const uint32_t kPdbStreamIndex = 1; // This is the fixed stream index where the PDB stream header is
 static const uint32_t kDataStreamIndex = 5; // This is the fixed stream index where we will store our custom data.
 static const uint32_t kMsfBlockSize = 512;
 
@@ -154,7 +155,7 @@ struct MSFWriter {
     void WriteBlocks(uint32_t NumBlocks, const void *Data, uint32_t Size) {
       assert(NumBlocks >= GetNumBlocks(Size) && "Cannot fit data into the requested number of blocks!");
       uint32_t TotalSize = NumBlocks * kMsfBlockSize;
-      OS.write((char *)Data, Size);
+      OS.write(static_cast<char*>(const_cast<void *>(Data)), Size);
       WriteZeroPads(TotalSize - Size);
       BlocksWritten += NumBlocks;
     }
@@ -375,29 +376,7 @@ struct PDBReader {
     return m_pStream->Seek(Offset, STREAM_SEEK_CUR, &BytesMoved);
   }
 
-  HRESULT ReadU32ListFromBlocks(ArrayRef<uint32_t> Blocks, UINT32 uOffsetByU32, UINT32 uNumU32, SmallVectorImpl<uint32_t> &Output) {
-    if (Blocks.size() == 0) return E_FAIL;
-    Output.clear();
-
-    for (unsigned i = 0; i < uNumU32; i++) {
-      UINT32 uOffsetInBytes = (uOffsetByU32+i) * sizeof(UINT32);
-      UINT32 BlockIndex = uOffsetInBytes / m_SB.BlockSize;
-      UINT32 ByteOffset = uOffsetInBytes % m_SB.BlockSize;
-
-      UINT32 uBlock = Blocks[BlockIndex];
-      IFR(GoToBeginningOfBlock(uBlock));
-      IFR(OffsetByU32(ByteOffset / sizeof(UINT32)));
-
-      UINT32 uData = 0;
-      IFR(ReadU32(&uData));
-
-      Output.push_back(uData);
-    }
-
-    return S_OK;
-  }
-
-  HRESULT ReadContainedData(IDxcBlob **ppData) {
+  HRESULT ReadWholeStream(uint32_t StreamIndex, IDxcBlob **ppData) {
     if (FAILED(m_Status)) return m_Status;
 
     UINT32 uNumDirectoryBlocks =
@@ -418,21 +397,21 @@ struct PDBReader {
     IFR(ReadU32(&uNumStreams));
 
     // If we don't have enough streams, then give up.
-    if (uNumStreams <= kDataStreamIndex)
+    if (uNumStreams <= StreamIndex)
       return E_FAIL;
 
     llvm::SmallVector<uint32_t, 6> StreamSizes;
     IFR(ReadU32ListFromBlocks(DirectoryBlocks, 1, uNumStreams, StreamSizes));
 
     UINT32 uOffsets = 0;
-    for (unsigned i = 0; i <= kDataStreamIndex-1; i++) {
+    for (unsigned i = 0; i < StreamIndex; i++) {
       UINT32 uNumBlocks = CalculateNumBlocks(m_SB.BlockSize, StreamSizes[i]);
       uOffsets += uNumBlocks;
     }
 
     llvm::SmallVector<uint32_t, 12> DataBlocks;
     IFR(ReadU32ListFromBlocks(DirectoryBlocks, 1 + uNumStreams + uOffsets, 
-      CalculateNumBlocks(m_SB.BlockSize, StreamSizes[kDataStreamIndex]), DataBlocks));
+      CalculateNumBlocks(m_SB.BlockSize, StreamSizes[StreamIndex]), DataBlocks));
 
     if (DataBlocks.size() == 0)
       return E_FAIL;
@@ -457,20 +436,66 @@ struct PDBReader {
 
     return S_OK;
   }
-};
 
+  HRESULT ReadU32ListFromBlocks(ArrayRef<uint32_t> Blocks, UINT32 uOffsetByU32, UINT32 uNumU32, SmallVectorImpl<uint32_t> &Output) {
+    if (Blocks.size() == 0) return E_FAIL;
+    Output.clear();
 
-HRESULT hlsl::pdb::LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppContainer) {
+    for (unsigned i = 0; i < uNumU32; i++) {
+      UINT32 uOffsetInBytes = (uOffsetByU32+i) * sizeof(UINT32);
+      UINT32 BlockIndex = uOffsetInBytes / m_SB.BlockSize;
+      UINT32 ByteOffset = uOffsetInBytes % m_SB.BlockSize;
+
+      UINT32 uBlock = Blocks[BlockIndex];
+      IFR(GoToBeginningOfBlock(uBlock));
+      IFR(OffsetByU32(ByteOffset / sizeof(UINT32)));
+
+      UINT32 uData = 0;
+      IFR(ReadU32(&uData));
+
+      Output.push_back(uData);
+    }
+
+    return S_OK;
+  }
+};
+
+HRESULT hlsl::pdb::LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppHash, IDxcBlob **ppContainer) {
   PDBReader Reader(pMalloc, pIStream);
 
-  CComPtr<IDxcBlob> pDataBlob;
-  IFR(Reader.ReadContainedData(&pDataBlob));
+  if (ppHash) {
+    CComPtr<IDxcBlob> pPdbStream;
+    IFR(Reader.ReadWholeStream(kPdbStreamIndex, &pPdbStream));
+
+    if (pPdbStream->GetBufferSize() < sizeof(PdbStreamHeader))
+      return E_FAIL;
+
+    PdbStreamHeader PdbHeader = {};
+    memcpy(&PdbHeader, pPdbStream->GetBufferPointer(), sizeof(PdbHeader));
+
+    CComPtr<hlsl::AbstractMemoryStream> pHash;
+    IFR(CreateMemoryStream(pMalloc, &pHash));
+    ULONG uBytesWritten = 0;
+    IFR(pHash->Write(PdbHeader.UniqueId, sizeof(PdbHeader.UniqueId), &uBytesWritten));
+
+    if (uBytesWritten != sizeof(PdbHeader.UniqueId))
+      return E_FAIL;
+
+    IFR(pHash.QueryInterface(ppHash));
+  }
+
+  CComPtr<IDxcBlob> pContainer;
+  IFR(Reader.ReadWholeStream(kDataStreamIndex, &pContainer));
 
-  if (!hlsl::IsValidDxilContainer((hlsl::DxilContainerHeader *)pDataBlob->GetBufferPointer(), pDataBlob->GetBufferSize()))
+  if (!hlsl::IsValidDxilContainer((hlsl::DxilContainerHeader *)pContainer->GetBufferPointer(), pContainer->GetBufferSize()))
     return E_FAIL;
 
-  *ppContainer = pDataBlob.Detach();
+  *ppContainer = pContainer.Detach();
 
   return S_OK;
 }
 
+HRESULT hlsl::pdb::LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppContainer) {
+  return LoadDataFromStream(pMalloc, pIStream, nullptr, ppContainer);
+}
+

+ 1 - 1
lib/DXIL/DxilShaderFlags.cpp

@@ -47,8 +47,8 @@ ShaderFlags::ShaderFlags():
 , m_bBarycentrics(false)
 , m_bUseNativeLowPrecision(false)
 , m_bShadingRate(false)
-, m_bSamplerFeedback(false)
 , m_bRaytracingTier1_1(false)
+, m_bSamplerFeedback(false)
 , m_align0(0)
 , m_align1(0)
 {}

+ 5 - 3
lib/DXIL/DxilShaderModel.cpp

@@ -250,7 +250,10 @@ const char * ShaderModel::GetKindName() const {
   return GetKindName(m_Kind);
 }
 
-const char * ShaderModel::GetKindName(Kind kind) {
+const char *ShaderModel::GetKindName(Kind kind) {
+  static_assert(static_cast<unsigned>(Kind::Invalid) ==
+                    _countof(ShaderModelKindNames) - 1,
+                "Invalid kinds or names");
   return ShaderModelKindNames[static_cast<unsigned int>(kind)];
 }
 
@@ -332,12 +335,11 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
 
   // lib_6_x is for offline linking only, and relaxes restrictions
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),
-
+  
   SM(Kind::Mesh,     6, 5, "ms_6_5",    0,  0,  true,  true,  UINT_MAX),
   SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true,  true,  UINT_MAX),
 
   // Values before Invalid must remain sorted by Kind, then Major, then Minor.
-
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
 };
 

+ 5 - 1
lib/DXIL/DxilUtil.cpp

@@ -538,6 +538,10 @@ bool IsHLSLResourceType(llvm::Type *Ty) {
 
 bool IsHLSLObjectType(llvm::Type *Ty) {
   if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
+    if (!ST->hasName()) {
+      return false;
+    }
+
     StringRef name = ST->getName();
     // TODO: don't check names.
     if (name.startswith("dx.types.wave_t"))
@@ -585,7 +589,7 @@ bool ContainsHLSLObjectType(llvm::Type *Ty) {
     Ty = llvm::cast<llvm::ArrayType>(Ty)->getArrayElementType();
 
   if (llvm::StructType *ST = llvm::dyn_cast<llvm::StructType>(Ty)) {
-    if (ST->getName().startswith("dx.types."))
+    if (ST->hasName() && ST->getName().startswith("dx.types."))
       return true;
     // TODO: How is this suppoed to check for Input/OutputPatch types if
     // these have already been eliminated in function arguments during CG?

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 523 - 271
lib/DxcSupport/FileIOHelper.cpp


+ 65 - 28
lib/DxcSupport/HLSLOptions.cpp

@@ -296,10 +296,27 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   const MainArgs &argStrings, DxcOpts &opts,
   llvm::raw_ostream &errors) {
   DXASSERT_NOMSG(optionTable != nullptr);
+  opts.DefaultTextCodePage = DXC_CP_UTF8;
 
   unsigned missingArgIndex = 0, missingArgCount = 0;
   InputArgList Args = optionTable->ParseArgs(
     argStrings.getArrayRef(), missingArgIndex, missingArgCount, flagsToInclude);
+
+  // Set DefaultTextCodePage early so it may influence error buffer
+  // Default to UTF8 for compatibility
+  llvm::StringRef encoding = Args.getLastArgValue(OPT_encoding);
+  if (!encoding.empty()) {
+    if (encoding.equals_lower("utf8")) {
+      opts.DefaultTextCodePage = DXC_CP_UTF8;
+    } else if (encoding.equals_lower("utf16")) {
+      opts.DefaultTextCodePage = DXC_CP_UTF16;
+    } else {
+      errors << "Unsupported value '" << encoding
+        << "for -encoding option.  Allowed values: utf8, utf16.";
+      return 1;
+    }
+  }
+
   // Verify consistency for external library support.
   opts.ExternalLib = Args.getLastArgValue(OPT_external_lib);
   opts.ExternalFn = Args.getLastArgValue(OPT_external_fn);
@@ -359,13 +376,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   }
 
   if (opts.IsLibraryProfile()) {
-    if (Args.getLastArg(OPT_entrypoint)) {
-      errors << "cannot specify entry point for a library";
-      return 1;
-    } else {
-      // Set entry point to impossible name.
-      opts.EntryPoint = "lib.no::entry";
-    }
+    // Don't bother erroring out when entry is specified.  We weren't always
+    // doing this before, so doing so will break existing code.
+    // Set entry point to impossible name.
+    opts.EntryPoint = "lib.no::entry";
   } else {
     if (Args.getLastArg(OPT_exports)) {
       errors << "library profile required when using -exports option";
@@ -427,6 +441,9 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.OutputObject = Args.getLastArgValue(OPT_Fo);
   opts.OutputHeader = Args.getLastArgValue(OPT_Fh);
   opts.OutputWarningsFile = Args.getLastArgValue(OPT_Fe);
+  opts.OutputReflectionFile = Args.getLastArgValue(OPT_Fre);
+  opts.OutputRootSigFile = Args.getLastArgValue(OPT_Frs);
+  opts.OutputShaderHashFile = Args.getLastArgValue(OPT_Fsh);
   opts.UseColor = Args.hasFlag(OPT_Cc, OPT_INVALID, false);
   opts.UseInstructionNumbers = Args.hasFlag(OPT_Ni, OPT_INVALID, false);
   opts.UseInstructionByteOffsets = Args.hasFlag(OPT_No, OPT_INVALID, false);
@@ -440,6 +457,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
+  if (opts.ForceRootSigVer.empty())
+    opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver_);
   opts.PrivateSource = Args.getLastArgValue(OPT_setprivate);
   opts.RootSignatureSource = Args.getLastArgValue(OPT_setrootsignature);
   opts.VerifyRootSignatureSource = Args.getLastArgValue(OPT_verifyrootsignature);
@@ -536,13 +555,17 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DisableValidation = Args.hasFlag(OPT_VD, OPT_INVALID, false);
 
   opts.AllResourcesBound = Args.hasFlag(OPT_all_resources_bound, OPT_INVALID, false);
+  opts.AllResourcesBound = Args.hasFlag(OPT_all_resources_bound_, OPT_INVALID, opts.AllResourcesBound);
   opts.ColorCodeAssembly = Args.hasFlag(OPT_Cc, OPT_INVALID, false);
   opts.DefaultRowMajor = Args.hasFlag(OPT_Zpr, OPT_INVALID, false);
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
-  opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load, OPT_INVALID, false);
+  opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_no_legacy_cbuf_layout, OPT_INVALID, false);
+  opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load_, OPT_INVALID, opts.NotUseLegacyCBufLoad);
   opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
+  opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable_, OPT_INVALID, opts.PackPrefixStable);
   opts.PackOptimized = Args.hasFlag(OPT_pack_optimized, OPT_INVALID, false);
+  opts.PackOptimized = Args.hasFlag(OPT_pack_optimized_, OPT_INVALID, opts.PackOptimized);
   opts.DisplayIncludeProcess = Args.hasFlag(OPT_H, OPT_INVALID, false);
   opts.WarningAsError = Args.hasFlag(OPT__SLASH_WX, OPT_INVALID, false);
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
@@ -564,6 +587,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.LegacyResourceReservation = Args.hasFlag(OPT_flegacy_resource_reservation, OPT_INVALID, false);
   opts.ExportShadersOnly = Args.hasFlag(OPT_export_shaders_only, OPT_INVALID, false);
   opts.ResMayAlias = Args.hasFlag(OPT_res_may_alias, OPT_INVALID, false);
+  opts.ResMayAlias = Args.hasFlag(OPT_res_may_alias_, OPT_INVALID, opts.ResMayAlias);
 
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
@@ -599,9 +623,18 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
   if (!opts.Preprocess.empty() &&
       (!opts.OutputHeader.empty() || !opts.OutputObject.empty() ||
-       !opts.OutputWarnings || !opts.OutputWarningsFile.empty())) {
-    errors << "Preprocess cannot be specified with other options.";
-    return 1;
+       !opts.OutputWarnings || !opts.OutputWarningsFile.empty() ||
+       !opts.OutputReflectionFile.empty() ||
+       !opts.OutputRootSigFile.empty() ||
+       !opts.OutputShaderHashFile.empty())) {
+    opts.OutputHeader = "";
+    opts.OutputObject = "";
+    opts.OutputWarnings = true;
+    opts.OutputWarningsFile = "";
+    opts.OutputReflectionFile = "";
+    opts.OutputRootSigFile = "";
+    opts.OutputShaderHashFile = "";
+    errors << "Warning: compiler options ignored with Preprocess.";
   }
 
   if (opts.DumpBin) {
@@ -620,6 +653,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     }
   }
 
+  // XXX TODO: Sort this out, since it's required for new API, but a separate argument for old APIs.
   if ((flagsToInclude & hlsl::options::DriverOption) &&
       opts.TargetProfile.empty() && !opts.DumpBin && opts.Preprocess.empty() && !opts.RecompileFromBinary) {
     // Target profile is required in arguments only for drivers when compiling;
@@ -628,23 +662,6 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     return 1;
   }
 
-  if (opts.EmbedDebug && !opts.DebugInfo) {
-    errors << "Must enable debug info with /Zi for /Qembed_debug";
-    return 1;
-  }
-
-  if (opts.DebugInfo && !opts.DebugNameForBinary && !opts.DebugNameForSource) {
-    opts.DebugNameForBinary = true;
-  } else if (opts.DebugNameForBinary && opts.DebugNameForSource) {
-    errors << "Cannot specify both /Zss and /Zsb";
-    return 1;
-  }
-
-  if (opts.DebugNameForSource && !opts.DebugInfo) {
-    errors << "/Zss requires debug info (/Zi)";
-    return 1;
-  }
-
   llvm::StringRef valVersionStr = Args.getLastArgValue(OPT_validator_version);
   if (!valVersionStr.empty()) {
     // Parse "major.minor" version string
@@ -815,6 +832,26 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV Change Ends
 
+  // Validation for DebugInfo here because spirv uses same DebugInfo opt,
+  // and legacy wrappers will add EmbedDebug in this case, leading to this
+  // failing if placed before spirv path sets DebugInfo to true.
+  if (opts.EmbedDebug && !opts.DebugInfo) {
+    errors << "Must enable debug info with /Zi for /Qembed_debug";
+    return 1;
+  }
+
+  if (opts.DebugInfo && !opts.DebugNameForBinary && !opts.DebugNameForSource) {
+    opts.DebugNameForBinary = true;
+  } else if (opts.DebugNameForBinary && opts.DebugNameForSource) {
+    errors << "Cannot specify both /Zss and /Zsb";
+    return 1;
+  }
+
+  if (opts.DebugNameForSource && !opts.DebugInfo) {
+    errors << "/Zss requires debug info (/Zi)";
+    return 1;
+  }
+
   opts.Args = std::move(Args);
   return 0;
 }

+ 26 - 10
lib/DxcSupport/Unicode.cpp

@@ -53,7 +53,7 @@ int MultiByteToWideChar(uint32_t CodePage, uint32_t /*dwFlags*/,
   size_t rv;
   const char *locale = CPToLocale(CodePage);
   locale = setlocale(LC_ALL, locale);
-  if (lpMultiByteStr[cbMultiByte] != '\0') {
+  if (lpMultiByteStr[cbMultiByte - 1] != '\0') {
     char *srcStr = (char *)malloc((cbMultiByte +1) * sizeof(char));
     strncpy(srcStr, lpMultiByteStr, cbMultiByte);
     srcStr[cbMultiByte]='\0';
@@ -102,7 +102,7 @@ int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
   size_t rv;
   const char *locale = CPToLocale(CodePage);
   locale = setlocale(LC_ALL, locale);
-  if (lpWideCharStr[cchWideChar] != L'\0') {
+  if (lpWideCharStr[cchWideChar - 1] != L'\0') {
     wchar_t *srcStr = (wchar_t *)malloc((cchWideChar+1) * sizeof(wchar_t));
     wcsncpy(srcStr, lpWideCharStr, cchWideChar);
     srcStr[cchWideChar] = L'\0';
@@ -120,10 +120,9 @@ int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
 namespace Unicode {
 
 _Success_(return != false)
-bool UTF16ToEncodedString(_In_z_ const wchar_t* text, DWORD cp, DWORD flags, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
+bool UTF16ToEncodedString(_In_z_ const wchar_t* text, size_t cUTF16, DWORD cp, DWORD flags, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
   BOOL usedDefaultChar;
   LPBOOL pUsedDefaultChar = (lossy == nullptr) ? nullptr : &usedDefaultChar;
-  size_t cUTF16 = wcslen(text);
   if (lossy != nullptr) *lossy = false;
 
   // Handle zero-length as a special case; it's a special value to indicate errors in WideCharToMultiByte.
@@ -188,30 +187,47 @@ std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8) {
 }
 
 _Use_decl_annotations_
-bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
+bool UTF8ToConsoleString(_In_z_ const char* text, _In_ size_t textLen, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
   DXASSERT_NOMSG(text != nullptr);
   DXASSERT_NOMSG(pValue != nullptr);
   std::wstring text16;
   if (lossy != nullptr) *lossy = false;
-  if (!UTF8ToUTF16String(text, &text16)) {
+  if (!UTF8ToUTF16String(text, textLen, &text16)) {
     return false;
   }
-  return UTF16ToConsoleString(text16.c_str(), pValue, lossy);
+  return UTF16ToConsoleString(text16.c_str(), text16.length(), pValue, lossy);
 }
 
 _Use_decl_annotations_
-bool UTF16ToConsoleString(const wchar_t* text, std::string* pValue, bool* lossy) {
+bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
+  return UTF8ToConsoleString(text, strlen(text), pValue, lossy);
+}
+
+_Use_decl_annotations_
+bool UTF16ToConsoleString(const wchar_t* text, _In_ size_t textLen, std::string* pValue, bool* lossy) {
   DXASSERT_NOMSG(text != nullptr);
   DXASSERT_NOMSG(pValue != nullptr);
   UINT cp = GetConsoleOutputCP();
-  return UTF16ToEncodedString(text, cp, 0, pValue, lossy);
+  return UTF16ToEncodedString(text, textLen, cp, 0, pValue, lossy);
+}
+
+_Use_decl_annotations_
+bool UTF16ToConsoleString(const wchar_t* text, std::string* pValue, bool* lossy) {
+  return UTF16ToConsoleString(text, wcslen(text), pValue, lossy);
+}
+
+_Use_decl_annotations_
+bool UTF16ToUTF8String(const wchar_t *pUTF16, size_t cUTF16, std::string *pUTF8) {
+  DXASSERT_NOMSG(pUTF16 != nullptr);
+  DXASSERT_NOMSG(pUTF8 != nullptr);
+  return UTF16ToEncodedString(pUTF16, cUTF16, CP_UTF8, 0, pUTF8, nullptr);
 }
 
 _Use_decl_annotations_
 bool UTF16ToUTF8String(const wchar_t *pUTF16, std::string *pUTF8) {
   DXASSERT_NOMSG(pUTF16 != nullptr);
   DXASSERT_NOMSG(pUTF8 != nullptr);
-  return UTF16ToEncodedString(pUTF16, CP_UTF8, 0, pUTF8, nullptr);
+  return UTF16ToEncodedString(pUTF16, wcslen(pUTF16), CP_UTF8, 0, pUTF8, nullptr);
 }
 
 std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16) {

+ 102 - 20
lib/DxcSupport/dxcapi.use.cpp

@@ -13,6 +13,7 @@
 #include "dxc/Support/dxcapi.use.h"
 #include "dxc/Support/Global.h"
 #include "dxc/Support/Unicode.h"
+#include "dxc/Support/FileIOHelper.h"
 #include "dxc/Support/WinFunctions.h"
 
 namespace dxc {
@@ -99,36 +100,128 @@ void WriteOperationResultToConsole(_In_ IDxcOperationResult *pRewriteResult,
   WriteBlobToConsole(pBlob, STD_OUTPUT_HANDLE);
 }
 
+static void WriteUtf16NullTermToConsole(_In_opt_count_(charCount) const wchar_t *pText,
+                                 DWORD streamType) {
+  if (pText == nullptr) {
+    return;
+  }
+
+  bool lossy; // Note: even if there was loss,  print anyway
+  std::string consoleMessage;
+  Unicode::UTF16ToConsoleString(pText, &consoleMessage, &lossy);
+  if (streamType == STD_OUTPUT_HANDLE) {
+    fprintf(stdout, "%s\n", consoleMessage.c_str());
+  }
+  else if (streamType == STD_ERROR_HANDLE) {
+    fprintf(stderr, "%s\n", consoleMessage.c_str());
+  }
+  else {
+    throw hlsl::Exception(E_INVALIDARG);
+  }
+}
+
+static HRESULT BlobToUtf8IfText(_In_opt_ IDxcBlob *pBlob, IDxcBlobUtf8 **ppBlobUtf8) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  if (SUCCEEDED(pBlob->QueryInterface(&pBlobEncoding))) {
+    BOOL known;
+    UINT32 cp = 0;
+    IFT(pBlobEncoding->GetEncoding(&known, &cp));
+    if (known) {
+      return hlsl::DxcGetBlobAsUtf8(pBlob, nullptr, ppBlobUtf8);
+    }
+  }
+  return S_OK;
+}
+
+static HRESULT BlobToUtf16IfText(_In_opt_ IDxcBlob *pBlob, IDxcBlobUtf16 **ppBlobUtf16) {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  if (SUCCEEDED(pBlob->QueryInterface(&pBlobEncoding))) {
+    BOOL known;
+    UINT32 cp = 0;
+    IFT(pBlobEncoding->GetEncoding(&known, &cp));
+    if (known) {
+      return hlsl::DxcGetBlobAsUtf16(pBlob, nullptr, ppBlobUtf16);
+    }
+  }
+  return S_OK;
+}
+
 void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType) {
   if (pBlob == nullptr) {
     return;
   }
 
-  // Assume UTF-8 for now, which is typically the case for dxcompiler ouput.
-  WriteUtf8ToConsoleSizeT((char *)pBlob->GetBufferPointer(), pBlob->GetBufferSize(), streamType);
+  // Try to get as UTF-16 or UTF-8
+  BOOL known;
+  UINT32 cp = 0;
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  IFT(pBlob->QueryInterface(&pBlobEncoding));
+  IFT(pBlobEncoding->GetEncoding(&known, &cp));
+
+  if (cp == DXC_CP_UTF16) {
+    CComPtr<IDxcBlobUtf16> pUtf16;
+    IFT(hlsl::DxcGetBlobAsUtf16(pBlob, nullptr, &pUtf16));
+    WriteUtf16NullTermToConsole(pUtf16->GetStringPointer(), streamType);
+  } else if (cp == CP_UTF8) {
+    CComPtr<IDxcBlobUtf8> pUtf8;
+    IFT(hlsl::DxcGetBlobAsUtf8(pBlob, nullptr, &pUtf8));
+    WriteUtf8ToConsoleSizeT(pUtf8->GetStringPointer(), pUtf8->GetStringLength(), streamType);
+  }
 }
 
-void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName) {
+void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName, _In_ UINT32 textCodePage) {
   if (pBlob == nullptr) {
     return;
   }
 
   CHandle file(CreateFileW(pFileName, GENERIC_WRITE, FILE_SHARE_READ, nullptr,
-                           CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
+    CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
   if (file == INVALID_HANDLE_VALUE) {
     IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
   }
-  WriteBlobToHandle(pBlob, file, pFileName);
+
+  WriteBlobToHandle(pBlob, file, pFileName, textCodePage);
 }
 
-void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, _In_ HANDLE hFile, _In_opt_ LPCWSTR pFileName) {
+void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, _In_ HANDLE hFile, _In_opt_ LPCWSTR pFileName, _In_ UINT32 textCodePage) {
   if (pBlob == nullptr) {
     return;
   }
 
+  LPCVOID pPtr = pBlob->GetBufferPointer();
+  SIZE_T size = pBlob->GetBufferSize();
+
+  std::string BOM;
+  CComPtr<IDxcBlobUtf8> pBlobUtf8;
+  CComPtr<IDxcBlobUtf16> pBlobUtf16;
+  if (textCodePage == DXC_CP_UTF8) {
+    IFT_Data(BlobToUtf8IfText(pBlob, &pBlobUtf8), pFileName);
+    if (pBlobUtf8) {
+      pPtr = pBlobUtf8->GetStringPointer();
+      size = pBlobUtf8->GetStringLength();
+      // TBD: Should we write UTF-8 BOM?
+      //BOM = "\xef\xbb\xbf"; // UTF-8
+    }
+  } else if (textCodePage == DXC_CP_UTF16) {
+    IFT_Data(BlobToUtf16IfText(pBlob, &pBlobUtf16), pFileName);
+    if (pBlobUtf16) {
+      pPtr = pBlobUtf16->GetStringPointer();
+      size = pBlobUtf16->GetStringLength() * sizeof(wchar_t);
+      BOM = "\xff\xfe"; // UTF-16 LE
+    }
+  }
+
+  IFT_Data(size > (SIZE_T)UINT32_MAX ? E_OUTOFMEMORY : S_OK , pFileName);
+
   DWORD written;
-  if (FALSE == WriteFile(hFile, pBlob->GetBufferPointer(),
-    pBlob->GetBufferSize(), &written, nullptr)) {
+
+  if (!BOM.empty()) {
+    if (FALSE == WriteFile(hFile, BOM.data(), BOM.length(), &written, nullptr)) {
+      IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
+    }
+  }
+
+  if (FALSE == WriteFile(hFile, pPtr, (DWORD)size, &written, nullptr)) {
     IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
   }
 }
@@ -142,21 +235,10 @@ void WriteUtf8ToConsole(_In_opt_count_(charCount) const char *pText,
   std::string resultToPrint;
   wchar_t *utf16Message = nullptr;
   size_t utf16MessageLen;
-  bool lossy; // Note: even if there was loss,  print anyway
   Unicode::UTF8BufferToUTF16Buffer(pText, charCount, &utf16Message,
                                    &utf16MessageLen);
 
-  std::string consoleMessage;
-  Unicode::UTF16ToConsoleString(utf16Message, &consoleMessage, &lossy);
-  if (streamType == STD_OUTPUT_HANDLE) {
-    fprintf(stdout, "%s\n", consoleMessage.c_str());
-  }
-  else if (streamType == STD_ERROR_HANDLE) {
-    fprintf(stderr, "%s\n", consoleMessage.c_str());
-  }
-  else {
-    throw hlsl::Exception(E_INVALIDARG);
-  }
+  WriteUtf16NullTermToConsole(utf16Message, streamType);
 
   delete[] utf16Message;
 }

+ 5 - 4
lib/DxcSupport/dxcmem.cpp

@@ -42,17 +42,18 @@ HRESULT DxcInitThreadMalloc() throw() {
 
 void DxcCleanupThreadMalloc() throw() {
   if (g_ThreadMallocTls) {
+    DXASSERT(g_pDefaultMalloc, "else DxcInitThreadMalloc didn't work/fail atomically");
     g_ThreadMallocTls->llvm::sys::ThreadLocal<IMalloc>::~ThreadLocal();
     g_pDefaultMalloc->Free(g_ThreadMallocTls);
     g_ThreadMallocTls = nullptr;
-    DXASSERT(g_pDefaultMalloc, "else DxcInitThreadMalloc didn't work/fail atomically");
-    g_pDefaultMalloc->Release();
-    g_pDefaultMalloc = nullptr;
   }
 }
 
 IMalloc *DxcGetThreadMallocNoRef() throw() {
-  DXASSERT(g_ThreadMallocTls != nullptr, "else prior to DxcInitThreadMalloc or after DxcCleanupThreadMalloc");
+  if (g_ThreadMallocTls == nullptr) {
+    return g_pDefaultMalloc;
+  }
+
   return g_ThreadMallocTls->get();
 }
 

+ 17 - 4
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -1548,7 +1548,8 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
                                            llvm::StringRef DebugName,
                                            SerializeDxilFlags Flags,
                                            DxilShaderHash *pShaderHashOut,
-                                           AbstractMemoryStream *pReflectionStreamOut) {
+                                           AbstractMemoryStream *pReflectionStreamOut,
+                                           AbstractMemoryStream *pRootSigStreamOut) {
   // TODO: add a flag to update the module and remove information that is not part
   // of DXIL proper and is used only to assemble the container.
 
@@ -1637,11 +1638,23 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
     writer.AddPart(
         DFCC_PipelineStateValidation, pPSVWriter->size(),
         [&](AbstractMemoryStream *pStream) { pPSVWriter->write(pStream); });
+
     // Write the root signature (RTS0) part.
     if (rootSigWriter.size()) {
-      writer.AddPart(
-        DFCC_RootSignature, rootSigWriter.size(),
-        [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
+      if (pRootSigStreamOut) {
+        // Write root signature wrapped in container for separate output
+        DxilContainerWriter_impl rootSigContainerWriter;
+        rootSigContainerWriter.AddPart(
+          DFCC_RootSignature, rootSigWriter.size(),
+          [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
+        rootSigContainerWriter.write(pRootSigStreamOut);
+      }
+      if ((Flags & SerializeDxilFlags::StripRootSignature) == 0) {
+        // Write embedded root signature
+        writer.AddPart(
+          DFCC_RootSignature, rootSigWriter.size(),
+          [&](AbstractMemoryStream *pStream) { rootSigWriter.write(pStream); });
+      }
       bMetadataStripped |= pModule->StripRootSignatureFromMetadata();
     }
   }

+ 5 - 0
lib/DxilDia/DxilDiaSymbolManager.cpp

@@ -1889,6 +1889,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::IsDbgDeclareCall(
 
   std::vector<dxil_dia::Session::RVA> usesRVAs;
 
+  bool HasRegister = false;
   if (auto *RegMV = llvm::dyn_cast<llvm::MetadataAsValue>(CI->getArgOperand(0))) {
     if (auto *RegVM = llvm::dyn_cast<llvm::ValueAsMetadata>(RegMV->getMetadata())) {
       if (auto *Reg = llvm::dyn_cast<llvm::Instruction>(RegVM->getValue())) {
@@ -1897,6 +1898,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::IsDbgDeclareCall(
         if (hr != S_OK) {
           return hr;
         }
+        HasRegister = true;
         llvm::iterator_range<llvm::Value::user_iterator> users = Reg->users();
         for (llvm::User *user : users) {
           auto *inst = llvm::dyn_cast<llvm::Instruction>(user);
@@ -1908,6 +1910,9 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::IsDbgDeclareCall(
       }
     }
   }
+  if (!HasRegister) {
+    return E_FAIL;
+  }
 
   if (!usesRVAs.empty()) {
     *pLowestUserRVA = *std::min_element(usesRVAs.begin(), usesRVAs.end());

+ 1 - 1
lib/DxilDia/DxilDiaSymbolManager.h

@@ -60,7 +60,7 @@ public:
 
 
   SymbolManager();
-  SymbolManager(SymbolManager&&) = default;
+  SymbolManager(SymbolManager &&) = default;
   SymbolManager &operator =(SymbolManager &&) = default;
   ~SymbolManager();
 

+ 1 - 1
lib/DxrFallback/StateFunctionTransform.cpp

@@ -45,7 +45,7 @@ inline std::string stringf(const char* fmt, ...)
   {
     ret.resize(size);
     va_start(args, fmt);
-    vsnprintf((char*)ret.data(), size + 1, fmt, args);
+    vsnprintf(const_cast<char*>(ret.data()), size + 1, fmt, args);
     va_end(args);
   }
   return ret;

+ 2 - 0
lib/HLSL/CMakeLists.txt

@@ -18,6 +18,7 @@ add_llvm_library(LLVMHLSL
   DxilPromoteResourcePasses.cpp
   DxilPackSignatureElement.cpp
   DxilPatchShaderRecordBindings.cpp
+  DxilNoops.cpp
   DxilPreserveAllOutputs.cpp
   DxilSimpleGVNHoist.cpp
   DxilSignatureValidation.cpp
@@ -26,6 +27,7 @@ add_llvm_library(LLVMHLSL
   DxilTranslateRawBuffer.cpp
   DxilExportMap.cpp
   DxilValidation.cpp
+  DxilValueCache.cpp
   DxcOptimizer.cpp
   HLDeadFunctionElimination.cpp
   HLExpandStoreIntrinsics.cpp

+ 1 - 1
lib/HLSL/ComputeViewIdStateBuilder.cpp

@@ -840,7 +840,7 @@ void DxilViewIdStateBuilder::CreateViewIdSets(const std::unordered_map<unsigned,
 unsigned DxilViewIdStateBuilder::GetLinearIndex(DxilSignatureElement &SigElem, int row, unsigned col) const {
   DXASSERT_NOMSG(row >= 0 && col < kNumComps && SigElem.GetStartRow() != Semantic::kUndefinedRow);
   unsigned idx = (((unsigned)row) + SigElem.GetStartRow())*kNumComps + col + SigElem.GetStartCol();
-  DXASSERT_NOMSG(idx < kMaxSigScalars);
+  DXASSERT_NOMSG(idx < kMaxSigScalars); (void)kMaxSigScalars;
   return idx;
 }
 

+ 5 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -20,6 +20,7 @@
 #include "dxc/HLSL/HLMatrixLowerPass.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/ComputeViewIdState.h"
+#include "dxc/HLSL/DxilValueCache.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/Support/dxcapi.impl.h"
 
@@ -92,12 +93,15 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilConvergentMarkPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
+    initializeDxilEliminateVectorPass(Registry);
     initializeDxilEmitMetadataPass(Registry);
     initializeDxilEraseDeadRegionPass(Registry);
     initializeDxilExpandTrigIntrinsicsPass(Registry);
     initializeDxilFinalizeModulePass(Registry);
+    initializeDxilFinalizeNoopsPass(Registry);
     initializeDxilFixConstArrayInitializerPass(Registry);
     initializeDxilGenerationPassPass(Registry);
+    initializeDxilInsertNoopsPass(Registry);
     initializeDxilLegalizeEvalOperationsPass(Registry);
     initializeDxilLegalizeResourcesPass(Registry);
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
@@ -110,6 +114,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilPromoteStaticResourcesPass(Registry);
     initializeDxilSimpleGVNHoistPass(Registry);
     initializeDxilTranslateRawBufferPass(Registry);
+    initializeDxilValueCachePass(Registry);
     initializeDynamicIndexingVectorToArrayPass(Registry);
     initializeEarlyCSELegacyPassPass(Registry);
     initializeEliminateAvailableExternallyPass(Registry);

+ 174 - 4
lib/HLSL/DxilCondenseResources.cpp

@@ -21,6 +21,7 @@
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLMatrixType.h"
 #include "dxc/HLSL/HLModule.h"
+#include "dxc/HLSL/DxilValueCache.h"
 
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -422,6 +423,168 @@ ModulePass *llvm::createDxilCondenseResourcesPass() {
 
 INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
 
+static
+bool LegalizeResourcesPHIs(Module &M, DxilValueCache *DVC) {
+
+  // Simple pass to collect resource PHI's
+  SmallVector<PHINode *, 8> PHIs;
+  for (Function &F : M) {
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : BB) {
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          if (hlsl::dxilutil::IsHLSLObjectType(PN->getType())) {
+            PHIs.push_back(PN);
+          }
+        }
+        else {
+          break;
+        }
+
+      }
+    }
+  }
+
+  if (PHIs.empty())
+    return false;
+
+  // Do a very simple CFG simplification of removing diamond graphs.
+  std::vector<BasicBlock *> DeadBlocks;
+  std::unordered_set<BasicBlock *> DeadBlocksSet;
+  for (PHINode *PN : PHIs) {
+    for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+      BasicBlock *BB = PN->getIncomingBlock(i);
+      if (DeadBlocksSet.count(BB)) continue;
+      if (DVC->IsNeverReachable(BB)) {
+        DeadBlocksSet.insert(BB);
+        DeadBlocks.push_back(BB);
+      }
+    }
+  }
+
+  bool Changed = false;
+  SmallVector<Value *, 3> CleanupValues;
+  SmallPtrSet<Value *, 3> CleanupValuesSet;
+  auto AddCleanupValues = [&CleanupValues, &CleanupValuesSet](Value *V) {
+    if (!CleanupValuesSet.count(V)) {
+      CleanupValuesSet.insert(V);
+      CleanupValues.push_back(V);
+    }
+  };
+
+  for (unsigned i = 0; i < DeadBlocks.size(); i++) {
+    BasicBlock *BB = DeadBlocks[i];
+    BasicBlock *Pred = BB->getSinglePredecessor();
+    BasicBlock *Succ = BB->getSingleSuccessor();
+
+    if (!Pred || !Succ)
+      continue;
+
+    // A very simple folding of diamond graph.
+    BranchInst *Br = cast<BranchInst>(Pred->getTerminator());
+    BasicBlock *Peer = nullptr;
+    if (Br->isConditional())
+      Peer = Br->getSuccessor(0) == BB ? 
+          Br->getSuccessor(1) : Br->getSuccessor(0);
+
+    if (Peer && Peer->getSingleSuccessor() == Succ) {
+      Changed = true;
+
+      BranchInst::Create(Peer, Pred);
+
+      Br->dropAllReferences();
+      Br->eraseFromParent();
+
+      for (Instruction &I : *Succ)
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          if (Instruction *IncomingI = dyn_cast<Instruction>(PN->getIncomingValueForBlock(BB))) {
+            if (!DeadBlocksSet.count(IncomingI->getParent()))
+              AddCleanupValues(IncomingI); // Mark the incoming value for deletion
+          }
+          PN->removeIncomingValue(BB);
+
+          if (PN->getNumIncomingValues() == 1) {
+            PN->replaceAllUsesWith(PN->getIncomingValue(0));
+            std::remove(PHIs.begin(), PHIs.end(), PN);
+            AddCleanupValues(PN); // Mark for deletion
+          }
+        }
+        else
+          break;
+
+      BB->dropAllReferences();
+      while (!BB->empty()){
+        Instruction *ChildI = &*BB->rbegin();
+        if (PHINode *PN = dyn_cast<PHINode>(ChildI))
+          std::remove(PHIs.begin(), PHIs.end(), PN);
+        ChildI->eraseFromParent();
+      }
+      BB->eraseFromParent();
+    }
+  }
+
+  unsigned Attempts = PHIs.size();
+  for (unsigned AttemptIdx = 0; AttemptIdx < Attempts; AttemptIdx++) {
+    bool LocalChanged = false;
+    for (auto It = PHIs.begin(); It != PHIs.end();) {
+      PHINode *PN = *It;
+      if (Value *V = DVC->GetValue(PN)) {
+
+        PHIs.erase(It);
+        AddCleanupValues(PN); // Mark for deletion later
+        PN->replaceAllUsesWith(V);
+        Changed = true;
+        LocalChanged = true;
+
+        for (unsigned i = 0, C = PN->getNumIncomingValues(); i < C; i++) {
+          Value *IncomingV = PN->getIncomingValue(i);
+          if (IncomingV != V)
+            AddCleanupValues(IncomingV); // Mark the incoming value for deletion later
+        }
+      }
+      else {
+        It++;
+      }
+    }
+
+    if (!LocalChanged)
+      break;
+  }
+
+  // Simple DCE to remove all dependencies of the resource PHI nodes we removed.
+  // This may be a little too agressive
+  for (;;) {
+    bool LocalChanged = false;
+    // Must use a numeric idx instead of an interator, because
+    // we're modifying the array as we go. Iterator gets invalidated
+    // because they're just pointers.
+    for (unsigned Idx = 0; Idx < CleanupValues.size();) {
+      Value *V = CleanupValues[Idx];
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        if (I->user_empty()) {
+          // Add dependencies to process
+          for (Value *Op : I->operands()) {
+            AddCleanupValues(Op);
+          }
+          LocalChanged = true;
+          I->eraseFromParent();
+          CleanupValues.erase(CleanupValues.begin() + Idx);
+        }
+        else {
+          Idx++;
+        }
+      }
+      else {
+        CleanupValues.erase(CleanupValues.begin() + Idx);
+      }
+    }
+
+    Changed |= LocalChanged;
+    if (!LocalChanged)
+      break;
+  }
+  return Changed;
+}
+
 namespace {
 class DxilLowerCreateHandleForLib : public ModulePass {
 private:
@@ -434,6 +597,10 @@ public:
   static char ID; // Pass identification, replacement for typeid
   explicit DxilLowerCreateHandleForLib() : ModulePass(ID) {}
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DxilValueCache>();
+  }
+
   const char *getPassName() const override {
     return "DXIL Lower createHandleForLib";
   }
@@ -483,6 +650,9 @@ public:
     if (m_bIsLib && DM.GetShaderModel()->GetMinor() == ShaderModel::kOfflineMinor)
       return bChanged;
 
+    DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
+    bChanged |= LegalizeResourcesPHIs(M, DVC);
+
     // Make sure no select on resource.
     bChanged |= RemovePhiOnResource();
 
@@ -1731,9 +1901,7 @@ void DxilLowerCreateHandleForLib::UpdateStructTypeForLegacyLayout() {
 
 // Change ResourceSymbol to undef if don't need.
 void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
-  std::vector<GlobalVariable *> &LLVMUsed = m_DM->GetLLVMUsed();
-
-  auto UpdateResourceSymbol = [&LLVMUsed, this](DxilResourceBase *res) {
+  auto UpdateResourceSymbol = [](DxilResourceBase *res) {
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(res->GetGlobalSymbol())) {
       GV->removeDeadConstantUsers();
       DXASSERT(GV->user_empty(), "else resource not lowered");
@@ -2254,7 +2422,9 @@ ModulePass *llvm::createDxilLowerCreateHandleForLibPass() {
   return new DxilLowerCreateHandleForLib();
 }
 
-INITIALIZE_PASS(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)
+INITIALIZE_PASS_BEGIN(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)
+INITIALIZE_PASS_DEPENDENCY(DxilValueCache)
+INITIALIZE_PASS_END(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)
 
 
 class DxilAllocateResourcesForLib : public ModulePass {

+ 27 - 5
lib/HLSL/DxilContainerReflection.cpp

@@ -415,6 +415,7 @@ protected:
 
 public:
   // Internal
+  HRESULT InitializeEmpty();
   HRESULT Initialize(
     DxilModule              &M,
     llvm::Type              *type,
@@ -795,6 +796,12 @@ static bool IsObjectType(
   return TryToDetectObjectType(structType, &ignored);
 }
 
+HRESULT CShaderReflectionType::InitializeEmpty()
+{
+  ZeroMemory(&m_Desc, sizeof(m_Desc));
+  return S_OK;
+}
+
 // Main logic for translating an LLVM type and associated
 // annotations into a D3D shader reflection type.
 HRESULT CShaderReflectionType::Initialize(
@@ -1282,7 +1289,9 @@ void CShaderReflectionConstantBuffer::InitializeStructuredBuffer(
   VarDesc.StartSampler = UINT_MAX;
   VarDesc.uFlags |= D3D_SVF_USED;
   CShaderReflectionVariable Var;
-  CShaderReflectionType *pVarType = nullptr;
+
+  // First type is an empty type: returned if no annotation available.
+  CShaderReflectionType *pVarType = allTypes[0].get();
 
   // Create reflection type, if we have the necessary annotation info
 
@@ -1685,6 +1694,14 @@ void DxilShaderReflection::SetCBufferUsage() {
 void DxilModuleReflection::CreateReflectionObjects() {
   DXASSERT_NOMSG(m_pDxilModule != nullptr);
 
+  {
+    // Add empty type for when no type info is available, instead of returning nullptr.
+    DXASSERT_NOMSG(m_Types.empty());
+    CShaderReflectionType *pEmptyType = new CShaderReflectionType();
+    m_Types.push_back(std::unique_ptr<CShaderReflectionType>(pEmptyType));
+    pEmptyType->InitializeEmpty();
+  }
+
   // Create constant buffers, resources and signatures.
   for (auto && cb : m_pDxilModule->GetCBuffers()) {
     std::unique_ptr<CShaderReflectionConstantBuffer> rcb(new CShaderReflectionConstantBuffer());
@@ -1908,7 +1925,8 @@ HRESULT DxilModuleReflection::LoadRDAT(const DxilPartHeader *pPart) {
 }
 
 HRESULT DxilModuleReflection::LoadModule(const DxilPartHeader *pShaderPart) {
-  DXASSERT_NOMSG(pShaderPart != nullptr);
+  if (pShaderPart == nullptr)
+    return E_INVALIDARG;
   const char *pData = GetDxilPartData(pShaderPart);
   try {
     const char *pBitcode;
@@ -1916,14 +1934,18 @@ HRESULT DxilModuleReflection::LoadModule(const DxilPartHeader *pShaderPart) {
     GetDxilProgramBitcode((DxilProgramHeader *)pData, &pBitcode, &bitcodeLength);
     std::unique_ptr<MemoryBuffer> pMemBuffer =
         MemoryBuffer::getMemBufferCopy(StringRef(pBitcode, bitcodeLength));
+    bool bBitcodeLoadError = false;
+    auto errorHandler = [&bBitcodeLoadError](const DiagnosticInfo &diagInfo) {
+        bBitcodeLoadError |= diagInfo.getSeverity() == DS_Error;
+      };
 #if 0 // We materialize eagerly, because we'll need to walk instructions to look for usage information.
     ErrorOr<std::unique_ptr<Module>> module =
-        getLazyBitcodeModule(std::move(pMemBuffer), Context);
+        getLazyBitcodeModule(std::move(pMemBuffer), Context, errorHandler);
 #else
     ErrorOr<std::unique_ptr<Module>> module =
-      parseBitcodeFile(pMemBuffer->getMemBufferRef(), Context, nullptr);
+      parseBitcodeFile(pMemBuffer->getMemBufferRef(), Context, errorHandler);
 #endif
-    if (!module) {
+    if (!module || bBitcodeLoadError) {
       return E_INVALIDARG;
     }
     std::swap(m_pModule, module.get());

+ 164 - 0
lib/HLSL/DxilNoops.cpp

@@ -0,0 +1,164 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilNoops.cpp                                                             //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Passes to insert dx.noops() and replace them with llvm.donothing()        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+namespace {
+StringRef kNoopName = "dx.noop";
+}
+
+//==========================================================
+// Insertion pass
+//
+
+namespace {
+
+Function *GetOrCreateNoopF(Module &M) {
+  LLVMContext &Ctx = M.getContext();
+  FunctionType *FT = FunctionType::get(Type::getVoidTy(Ctx), false);
+  Function *NoopF = cast<Function>(M.getOrInsertFunction(::kNoopName, FT));
+  NoopF->addFnAttr(Attribute::AttrKind::Convergent);
+  return NoopF;
+}
+
+class DxilInsertNoops : public FunctionPass {
+public:
+  static char ID;
+  DxilInsertNoops() : FunctionPass(ID) {
+    initializeDxilInsertNoopsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override { return "Dxil Insert Noops"; }
+};
+
+char DxilInsertNoops::ID;
+}
+
+bool DxilInsertNoops::runOnFunction(Function &F) {
+  Module &M = *F.getParent();
+  Function *NoopF = nullptr;
+  bool Changed = false;
+
+  // Find instructions where we want to insert nops
+  for (BasicBlock &BB : F) {
+    for (BasicBlock::iterator It = BB.begin(), E = BB.end(); It != E;) {
+      bool InsertNop = false;
+      Instruction &I = *(It++);
+      // If we are calling a real function, insert one
+      // at the callsite.
+      if (CallInst *Call = dyn_cast<CallInst>(&I)) {
+        if (Function *F = Call->getCalledFunction()) {
+          if (!F->isDeclaration())
+            InsertNop = true;
+        }
+      }
+      else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(&I)) {
+        InsertNop = true;
+      }
+      // If we have a copy, e.g:
+      //     float x = 0;
+      //     float y = x;    <---- copy
+      // insert a nop there.
+      else if (StoreInst *Store = dyn_cast<StoreInst>(&I)) {
+        Value *V = Store->getValueOperand();
+        if (isa<LoadInst>(V) || isa<Constant>(V))
+          InsertNop = true;
+      }
+      // If we have a return, just to be safe.
+      else if (ReturnInst *Ret = dyn_cast<ReturnInst>(&I)) {
+        InsertNop = true;
+      }
+
+      // Do the insertion
+      if (InsertNop) {
+        if (!NoopF) 
+          NoopF = GetOrCreateNoopF(M);
+        CallInst *Noop = CallInst::Create(NoopF, {}, &I);
+        Noop->setDebugLoc(I.getDebugLoc());
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createDxilInsertNoopsPass() {
+  return new DxilInsertNoops();
+}
+
+INITIALIZE_PASS(DxilInsertNoops, "dxil-insert-noops", "Dxil Insert Noops", false, false)
+
+
+//==========================================================
+// Finalize pass
+//
+
+namespace {
+
+class DxilFinalizeNoops : public ModulePass {
+public:
+  static char ID;
+  DxilFinalizeNoops() : ModulePass(ID) {
+    initializeDxilFinalizeNoopsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "Dxil Finalize Noops"; }
+};
+
+char DxilFinalizeNoops::ID;
+}
+
+// Replace all @dx.noop's with @llvm.donothing
+bool DxilFinalizeNoops::runOnModule(Module &M) {
+  Function *NoopF = nullptr;
+  for (Function &F : M) {
+    if (!F.isDeclaration())
+      continue;
+    if (F.getName() == ::kNoopName) {
+      NoopF = &F;
+      break;
+    }
+  }
+
+  if (!NoopF)
+    return false;
+
+  if (!NoopF->user_empty()) {
+    Function *DoNothingF = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
+    for (auto It = NoopF->user_begin(), E = NoopF->user_end(); It != E;) {
+      User *U = *(It++);
+      CallInst *CI = cast<CallInst>(U);
+      CI->setCalledFunction(DoNothingF);
+    }
+  }
+
+  assert(NoopF->user_empty() && "dx.noop calls must be all removed now");
+  NoopF->eraseFromParent();
+
+  return true;
+}
+
+Pass *llvm::createDxilFinalizeNoopsPass() {
+  return new DxilFinalizeNoops();
+}
+
+INITIALIZE_PASS(DxilFinalizeNoops, "dxil-finalize-noops", "Dxil Finalize Noops", false, false)
+

+ 7 - 14
lib/HLSL/DxilPatchShaderRecordBindings.cpp

@@ -91,7 +91,7 @@ struct ShaderRecordEntry {
   unsigned int RecordOffsetInBytes;
   unsigned int OffsetInDescriptors; // Only valid for descriptor tables
 
-  static ShaderRecordEntry InvalidEntry() { return { (DxilRootParameterType)-1, (unsigned int)-1 }; }
+  static ShaderRecordEntry InvalidEntry() { return { (DxilRootParameterType)-1, (unsigned int)-1, 0 }; }
   bool IsInvalid() { return (unsigned int)ParameterType == (unsigned int)-1; }
 };
 
@@ -176,7 +176,7 @@ private:
   llvm::Function *EntryPointFunction;
 
   ShaderInfo *pInputShaderInfo;
-  DxilVersionedRootSignatureDesc *pRootSignatureDesc;
+  const DxilVersionedRootSignatureDesc *pRootSignatureDesc;
   DXIL::ShaderKind ShaderKind;
 };
 
@@ -222,7 +222,7 @@ void DxilPatchShaderRecordBindings::applyOptions(PassOptions O) {
     if (0 == option.first.compare("root-signature")) {
       unsigned int cHexRadix = 16;
       pInputShaderInfo = (ShaderInfo*)strtoull(option.second.data(), nullptr, cHexRadix);
-      pRootSignatureDesc = (DxilVersionedRootSignatureDesc*)pInputShaderInfo->pRootSignatureDesc;
+      pRootSignatureDesc = (const DxilVersionedRootSignatureDesc*)pInputShaderInfo->pRootSignatureDesc;
     }
   }
 }
@@ -386,11 +386,6 @@ DXIL::ShaderKind GetRayShaderKindCopy(Function* F)
     return DXIL::ShaderKind::Invalid;
 }
 
-static std::string ws2s(const std::wstring& wide)
-{
-    return std::string(wide.begin(), wide.end());
-}
-
 bool DxilPatchShaderRecordBindings::runOnModule(Module &M) {
   DxilModule &DM = M.GetOrCreateDxilModule();
   EntryPointFunction = pInputShaderInfo->ExportName ? getFunctionFromName(M, pInputShaderInfo->ExportName) : DM.GetEntryFunction();
@@ -524,7 +519,6 @@ llvm::Value *DxilPatchShaderRecordBindings::GetAliasedDescriptorHeapHandle(Modul
 
 
         llvm::ArrayType *descriptorHeapType = ArrayType::get(type, 0);
-        static unsigned int i = 0;
         unsigned int id = AddAliasedHandle(M, FallbackLayerDescriptorHeapTable, FallbackLayerRegisterSpace + FallbackLayerDescriptorHeapSpaceOffset + registerSpaceOffset, resClass, resKind, HandleName, descriptorHeapType);
         
         TypeToAliasedDescriptorHeap[resClassIndex][key] = GetResourceFromID(DM, resClass, id).GetGlobalSymbol();
@@ -695,7 +689,7 @@ bool DxilPatchShaderRecordBindings::IsCBufferLoad(llvm::Instruction *instruction
   return cbufferLoad || cbufferLoadLegacy;
 }
 
-const unsigned int GetResolvedRangeID(DXIL::ResourceClass resClass, Value *rangeIdVal)
+unsigned int GetResolvedRangeID(DXIL::ResourceClass resClass, Value *rangeIdVal)
 {
   if (auto CI = dyn_cast<ConstantInt>(rangeIdVal))
   {
@@ -854,7 +848,6 @@ void DxilPatchShaderRecordBindings::PatchShaderBindings(Module &M) {
   std::vector<llvm::Instruction *> instructionsToRemove;
   for (BasicBlock &block : EntryPointFunction->getBasicBlockList()) {
     auto & Instructions = block.getInstList();
-    auto It = Instructions.begin();
 
     for (auto &instr : Instructions) {
       DxilInst_CreateHandleForLib createHandleForLib(&instr);
@@ -1083,7 +1076,7 @@ ShaderRecordEntry FindRootSignatureDescriptorHelper(
                                                        dxilParamType) &&
             baseRegisterIndex == rootParam.Constants.ShaderRegister &&
             registerSpace == rootParam.Constants.RegisterSpace) {
-          return {dxilParamType, recordOffset};
+          return {dxilParamType, recordOffset, 0};
         }
         recordOffset += rootParam.Constants.Num32BitValues * sizeof(uint32_t);
         break;
@@ -1094,7 +1087,7 @@ ShaderRecordEntry FindRootSignatureDescriptorHelper(
         for (unsigned int rangeIndex = 0;
              rangeIndex < descriptorTable.NumDescriptorRanges; rangeIndex++) {
           auto &range = descriptorTable.pDescriptorRanges[rangeIndex];
-          if (range.OffsetInDescriptorsFromTableStart != -1) {
+          if (range.OffsetInDescriptorsFromTableStart != (unsigned)-1) {
             rangeOffsetInDescriptors = range.OffsetInDescriptorsFromTableStart;
           }
 
@@ -1121,7 +1114,7 @@ ShaderRecordEntry FindRootSignatureDescriptorHelper(
                                                        dxilParamType) &&
             baseRegisterIndex == rootParam.Descriptor.ShaderRegister &&
             registerSpace == rootParam.Descriptor.RegisterSpace) {
-          return {dxilParamType, recordOffset};
+          return {dxilParamType, recordOffset, 0};
         }
 
         recordOffset += SizeofD3D12GpuVA;

+ 25 - 5
lib/HLSL/DxilPreparePasses.cpp

@@ -256,11 +256,6 @@ static bool GetUnsignedVal(Value *V, uint32_t *pValue) {
   return true;
 }
 
-static uint8_t NegMask(uint8_t V) {
-  V ^= 0xF;
-  return V & 0xF;
-}
-
 static void MarkUsedSignatureElements(Function *F, DxilModule &DM) {
   DXASSERT_NOMSG(F != nullptr);
   // For every loadInput/storeOutput, update the corresponding ReadWriteMask.
@@ -336,6 +331,27 @@ public:
 
   const char *getPassName() const override { return "HLSL DXIL Finalize Module"; }
 
+  void patchValidation_1_5(Module &M) {
+    Function *DoNothingF = nullptr;
+    for (Function &F : M) {
+      if (F.isIntrinsic() && F.getIntrinsicID() == Intrinsic::donothing) {
+        DoNothingF = &F;
+        break;
+      }
+    }
+
+    if (!DoNothingF)
+      return;
+
+    for (auto It = DoNothingF->user_begin(), E = DoNothingF->user_end(); It != E; ) {
+      User *U = *(It++);
+      cast<Instruction>(U)->eraseFromParent();
+    }
+
+    assert(DoNothingF->user_empty() && "Not all users removed from @llvm.donothing");
+    DoNothingF->eraseFromParent();
+  }
+
   void patchValidation_1_1(Module &M) {
     for (iplist<Function>::iterator F : M.getFunctionList()) {
       for (Function::iterator BBI = F->begin(), BBE = F->end(); BBI != BBE;
@@ -383,6 +399,10 @@ public:
           MarkUsedSignatureElements(DM.GetPatchConstantFunction(), DM);
       }
 
+      if (ValMajor == 1 && ValMinor <= 5) {
+        patchValidation_1_5(M);
+      }
+
       // Remove store undef output.
       hlsl::OP *hlslOP = M.GetDxilModule().GetOP();
       RemoveStoreUndefOutput(M, hlslOP);

+ 451 - 0
lib/HLSL/DxilValueCache.cpp

@@ -0,0 +1,451 @@
+//===---------- DxilValueCache.cpp - Dxil Constant Value Cache ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility to compute and cache constant values for instructions.
+//
+
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/Statistic.h"
+
+#include "dxc/HLSL/DxilValueCache.h"
+#include <unordered_set>
+
+#define DEBUG_TYPE "dxil-value-cache"
+
+using namespace llvm;
+
+static
+bool IsConstantTrue(const Value *V) {
+  if (const ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return C->getLimitedValue() != 0;
+  return false;
+}
+static
+bool IsConstantFalse(const Value *V) {
+  if (const ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return C->getLimitedValue() == 0;
+  return false;
+}
+
+static
+bool IsEntryBlock(const BasicBlock *BB) {
+  return BB == &BB->getParent()->getEntryBlock();
+}
+
+void DxilValueCache::MarkAlwaysReachable(BasicBlock *BB) {
+  ValueMap.Set(BB, ConstantInt::get(Type::getInt1Ty(BB->getContext()), 1));
+}
+void DxilValueCache::MarkNeverReachable(BasicBlock *BB) {
+  ValueMap.Set(BB, ConstantInt::get(Type::getInt1Ty(BB->getContext()), 0));
+}
+
+bool DxilValueCache::IsAlwaysReachable_(BasicBlock *BB) {
+  if (Value *V = ValueMap.Get(BB))
+    if (IsConstantTrue(V))
+      return true;
+  return false;
+}
+
+bool DxilValueCache::IsNeverReachable_(BasicBlock *BB) {
+  if (Value *V = ValueMap.Get(BB))
+    if (IsConstantFalse(V))
+      return true;
+  return false;
+}
+
+Value *DxilValueCache::ProcessAndSimplify_PHI(Instruction *I, DominatorTree *DT) {
+  PHINode *PN = cast<PHINode>(I);
+  BasicBlock *SoleIncoming = nullptr;
+
+  Value *Simplified = nullptr;
+  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    if (IsAlwaysReachable_(PredBB)) {
+      SoleIncoming = PredBB;
+      break;
+    }
+    else if (!IsNeverReachable_(PredBB)) {
+      if (SoleIncoming) {
+        SoleIncoming = nullptr;
+        break;
+      }
+      SoleIncoming = PredBB;
+    }
+  }
+
+  if (SoleIncoming) {
+    Value *V = OptionallyGetValue(PN->getIncomingValueForBlock(SoleIncoming));
+    if (isa<Constant>(V))
+      Simplified = V;
+    else if (Instruction *I = dyn_cast<Instruction>(V)) {
+      // If this is an instruction, we have to make sure it
+      // dominates this PHI.
+      // There are several conditions that qualify:
+      //   1. There's only one predecessor
+      //   2. If the instruction is in the entry block, then it must dominate
+      //   3. If we are provided with a Dominator tree, and it decides that
+      //      it dominates.
+      if (PN->getNumIncomingValues() == 1 ||
+        IsEntryBlock(I->getParent()) ||
+        (DT && DT->dominates(I, PN)))
+      {
+        Simplified = I;
+      }
+    }
+  }
+
+  // If we coulnd't deduce it, run the LLVM stock simplification to see
+  // if we could do anything.
+  if (!Simplified)
+    Simplified = llvm::SimplifyInstruction(I, I->getModule()->getDataLayout());
+
+  // One last step, to check if we have anything cached for whatever we
+  // simplified to.
+  if (Simplified)
+    Simplified = OptionallyGetValue(Simplified);
+
+  return Simplified;
+}
+
+Value *DxilValueCache::ProcessAndSimpilfy_Br(Instruction *I, DominatorTree *DT) {
+
+  // The *only* reason we're paying special attention to the
+  // branch inst, is to mark certain Basic Blocks as always
+  // reachable or unreachable.
+
+  BranchInst *Br = cast<BranchInst>(I);
+
+  BasicBlock *BB = Br->getParent();
+  if (Br->isConditional()) {
+
+    BasicBlock *TrueSucc = Br->getSuccessor(0);
+    BasicBlock *FalseSucc = Br->getSuccessor(1);
+
+    Value *Cond = OptionallyGetValue(Br->getCondition());
+
+    if (IsNeverReachable_(BB)) {
+      MarkNeverReachable(FalseSucc);
+      MarkNeverReachable(TrueSucc);
+    }
+    else if (IsConstantTrue(Cond)) {
+      if (IsAlwaysReachable_(BB)) {
+        MarkAlwaysReachable(TrueSucc);
+      }
+      if (FalseSucc->getSinglePredecessor())
+        MarkNeverReachable(FalseSucc);
+    }
+    else if (IsConstantFalse(Cond)) {
+      if (IsAlwaysReachable_(BB)) {
+        MarkAlwaysReachable(FalseSucc);
+      }
+      if (TrueSucc->getSinglePredecessor())
+        MarkNeverReachable(TrueSucc);
+    }
+  }
+  else {
+    BasicBlock *Succ = Br->getSuccessor(0);
+    if (IsAlwaysReachable_(BB))
+      MarkAlwaysReachable(Succ);
+    else if (Succ->getSinglePredecessor() && IsNeverReachable_(BB))
+      MarkNeverReachable(Succ);
+  }
+
+  return nullptr;
+}
+
+
+
+Value *DxilValueCache::SimplifyAndCacheResult(Instruction *I, DominatorTree *DT) {
+
+  const DataLayout &DL = I->getModule()->getDataLayout();
+
+  Value *Simplified = nullptr;
+  if (Instruction::Br == I->getOpcode()) {
+    Simplified = ProcessAndSimpilfy_Br(I, DT);
+  }
+  else if (Instruction::PHI == I->getOpcode()) {
+    Simplified = ProcessAndSimplify_PHI(I, DT);
+  }
+  // The rest of the checks use LLVM stock simplifications
+  else if (I->isBinaryOp()) {
+    Simplified =
+      llvm::SimplifyBinOp(
+        I->getOpcode(),
+        OptionallyGetValue(I->getOperand(0)),
+        OptionallyGetValue(I->getOperand(1)),
+        DL);
+  }
+  else if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    Simplified =
+      llvm::SimplifyCmpInst(Cmp->getPredicate(),
+        OptionallyGetValue(I->getOperand(0)),
+        OptionallyGetValue(I->getOperand(1)),
+        DL);
+  }
+  else if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+    Simplified = 
+      llvm::SimplifySelectInst(
+        OptionallyGetValue(Select->getCondition()),
+        OptionallyGetValue(Select->getTrueValue()),
+        OptionallyGetValue(Select->getFalseValue()),
+        DL
+      );
+  }
+  else if (ExtractElementInst *IE = dyn_cast<ExtractElementInst>(I)) {
+    Simplified =
+      llvm::SimplifyExtractElementInst(
+        OptionallyGetValue(IE->getVectorOperand()),
+        OptionallyGetValue(IE->getIndexOperand()),
+        DL, nullptr, DT);
+  }
+  else if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+    Simplified =
+      llvm::SimplifyCastInst(
+        Cast->getOpcode(),
+        OptionallyGetValue(Cast->getOperand(0)),
+        Cast->getType(), DL);
+  }
+
+  if (Simplified && isa<Constant>(Simplified))
+    ValueMap.Set(I, Simplified);
+
+  return Simplified;
+}
+
+STATISTIC(StaleValuesEncountered, "Stale Values Encountered");
+
+bool DxilValueCache::WeakValueMap::Seen(Value *V) {
+  auto FindIt = Map.find(V);
+  if (FindIt == Map.end())
+    return false;
+
+  auto &Entry = FindIt->second;
+  if (Entry.IsStale())
+    return false;
+  return Entry.Value;
+}
+
+Value *DxilValueCache::WeakValueMap::Get(Value *V) {
+  auto FindIt = Map.find(V);
+  if (FindIt == Map.end())
+    return nullptr;
+
+  auto &Entry = FindIt->second;
+  if (Entry.IsStale())
+    return nullptr;
+
+  Value *Result = Entry.Value;
+  if (Result == GetSentinel(V->getContext()))
+    return nullptr;
+
+  return Result;
+}
+
+void DxilValueCache::WeakValueMap::SetSentinel(Value *Key) {
+  Map[Key].Set(Key, GetSentinel(Key->getContext()));
+}
+
+Value *DxilValueCache::WeakValueMap::GetSentinel(LLVMContext &Ctx) {
+  if (!Sentinel) {
+    Sentinel.reset( PHINode::Create(Type::getInt1Ty(Ctx), 0) );
+  }
+  return Sentinel.get();
+}
+
+LLVM_DUMP_METHOD
+void DxilValueCache::WeakValueMap::dump() const {
+  for (auto It = Map.begin(), E = Map.end(); It != E; It++) {
+    const Value *Key = It->first;
+    if (It->second.IsStale())
+      continue;
+    const Value *V = It->second.Value;
+    bool IsSentinel = Sentinel && V == Sentinel.get();
+    if (const BasicBlock *BB = dyn_cast<BasicBlock>(Key)) {
+      dbgs() << "[BB]" << BB->getName() << " -> ";
+      if (IsSentinel)
+        dbgs() << "NO_VALUE";
+      else {
+        if (IsConstantTrue(V))
+          dbgs() << "Always Reachable!";
+        else if (IsConstantFalse(V))
+          dbgs() << "Never Reachable!";
+      }
+    }
+    else {
+      dbgs() << Key->getName() << " -> ";
+      if (IsSentinel)
+        dbgs() << "NO_VALUE";
+      else
+        dbgs() << *V;
+    }
+    dbgs() << "\n";
+  }
+}
+
+void DxilValueCache::WeakValueMap::Set(Value *Key, Value *V) {
+  Map[Key].Set(Key, V);
+}
+
+// If there's a cached value, return it. Otherwise, return
+// the value itself.
+Value *DxilValueCache::OptionallyGetValue(Value *V) {
+  if (Value *Simplified = ValueMap.Get(V))
+    return Simplified;
+  return V;
+}
+
+DxilValueCache::DxilValueCache() : ModulePass(ID) {
+  initializeDxilValueCachePass(*PassRegistry::getPassRegistry());
+}
+
+const char *DxilValueCache::getPassName() const {
+  return "Dxil Value Cache";
+}
+
+Value *DxilValueCache::GetValue(Value *V, DominatorTree *DT) {
+  if (Value *NewV = ValueMap.Get(V))
+    return NewV;
+  return ProcessValue(V, DT);
+}
+
+bool DxilValueCache::IsAlwaysReachable(BasicBlock *BB, DominatorTree *DT) {
+  ProcessValue(BB, DT);
+  return IsAlwaysReachable_(BB);
+}
+
+bool DxilValueCache::IsNeverReachable(BasicBlock *BB, DominatorTree *DT) {
+  ProcessValue(BB, DT);
+  return IsNeverReachable_(BB);
+}
+
+LLVM_DUMP_METHOD
+void DxilValueCache::dump() const {
+  ValueMap.dump();
+}
+
+Value *DxilValueCache::ProcessValue(Value *NewV, DominatorTree *DT) {
+
+  Value *Result = nullptr;
+
+  SmallVector<Value *, 16> WorkList;
+
+  // Although we accept all values for convenience, we only process
+  // Instructions.
+  if (Instruction *I = dyn_cast<Instruction>(NewV)) {
+    WorkList.push_back(I);
+  }
+  else if (BasicBlock *BB = dyn_cast<BasicBlock>(NewV)) {
+    WorkList.push_back(BB->getTerminator());
+    WorkList.push_back(BB);
+  }
+  else {
+    return nullptr;
+  }
+
+  // Unconditionally process this one instruction, whether we've seen
+  // it or not. The simplification might be able to do something to
+  // simplify it even when we don't have its value cached.
+
+
+  // This is a basic DFS setup.
+  while (WorkList.size()) {
+    Value *V = WorkList.back();
+
+    // If we haven't seen this value, go in and push things it depends on
+    // into the worklist.
+    if (!ValueMap.Seen(V)) {
+      ValueMap.SetSentinel(V);
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+
+        for (Use &U : I->operands()) {
+          Instruction *UseI = dyn_cast<Instruction>(U.get());
+          if (!UseI)
+            continue;
+          if (!ValueMap.Seen(UseI))
+            WorkList.push_back(UseI);
+        }
+
+        if (PHINode *PN = dyn_cast<PHINode>(I)) {
+          for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+            BasicBlock *BB = PN->getIncomingBlock(i);
+            TerminatorInst *Term = BB->getTerminator();
+            if (!ValueMap.Seen(Term))
+              WorkList.push_back(Term);
+            if (!ValueMap.Seen(BB))
+              WorkList.push_back(BB);
+          }
+        }
+      }
+      else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+        if (IsEntryBlock(BB)) {
+          MarkAlwaysReachable(BB);
+        }
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; PI++) {
+          BasicBlock *PredBB = *PI;
+          TerminatorInst *Term = PredBB->getTerminator();
+          if (!ValueMap.Seen(Term))
+            WorkList.push_back(Term);
+          if (!ValueMap.Seen(PredBB))
+            WorkList.push_back(PredBB);
+        }
+      }
+    }
+    // If we've seen this values, all its dependencies must have been processed
+    // as well.
+    else {
+      WorkList.pop_back();
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        Value *SimplifiedValue = SimplifyAndCacheResult(I, DT);
+        // Set the result if this is the input inst.
+        // SimplifyInst may not have cached the value
+        // so we return it directly.
+        if (I == NewV)
+          Result = SimplifiedValue;
+      }
+      else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+        // Deduce the basic block's reachability based on
+        // other analysis.
+        if (!IsEntryBlock(BB)) {
+          bool AllNeverReachable = true;
+          for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; PI++) {
+            if (!IsNeverReachable_(BB)) {
+              AllNeverReachable = false;
+              break;
+            }
+          }
+          if (AllNeverReachable)
+            MarkNeverReachable(BB);
+        }
+
+      }
+    }
+  }
+
+  return Result;
+}
+
+char DxilValueCache::ID;
+
+ModulePass *llvm::createDxilValueCachePass() {
+  return new DxilValueCache();
+}
+
+INITIALIZE_PASS(DxilValueCache, DEBUG_TYPE, "Dxil Value Cache", false, false)
+

+ 1 - 1
lib/HLSL/HLExpandStoreIntrinsics.cpp

@@ -99,7 +99,7 @@ void HLExpandStoreIntrinsics::emitElementStores(CallInst &OriginalCall,
   }
   else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(StackTopTy)) {
     unsigned ElemSize = (unsigned)Module.getDataLayout().getTypeAllocSize(ArrayTy->getElementType());
-    for (int i = 0; i < ArrayTy->getNumElements(); ++i) {
+    for (int i = 0; i < (int)ArrayTy->getNumElements(); ++i) {
       unsigned ElemOffsetFromBase = OffsetFromBase + ElemSize * i;
       GEPIndicesStack.emplace_back(Builder.getInt32(i));
       emitElementStores(OriginalCall, GEPIndicesStack, ArrayTy->getElementType(), ElemOffsetFromBase);

+ 4 - 3
lib/HLSL/HLModule.cpp

@@ -945,11 +945,12 @@ unsigned HLModule::GetBindingForResourceInCB(GetElementPtrInst *CbPtr,
 // TODO: Don't check names.
 bool HLModule::IsStreamOutputType(llvm::Type *Ty) {
   if (StructType *ST = dyn_cast<StructType>(Ty)) {
-    if (ST->getName().startswith("class.PointStream"))
+    StringRef name = ST->getName();
+    if (name.startswith("class.PointStream"))
       return true;
-    if (ST->getName().startswith("class.LineStream"))
+    if (name.startswith("class.LineStream"))
       return true;
-    if (ST->getName().startswith("class.TriangleStream"))
+    if (name.startswith("class.TriangleStream"))
       return true;
   }
   return false;

+ 1 - 21
lib/HLSL/HLOperationLower.cpp

@@ -827,24 +827,6 @@ bool IsValidLoadInput(Value *V) {
   return true;
 }
 
-// Apply current shuffle vector mask on top of previous shuffle mask.
-// For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
-// new mask would be (13,11,12,10)
-Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
-  if (curMask == nullptr) {
-    return prevMask;
-  }
-  unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
-  SmallVector<uint32_t, 16> Elts;
-  for (unsigned i = 0; i != size; ++i) {
-    ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
-    ConstantInt *IVal =
-        cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
-    Elts.emplace_back(IVal->getSExtValue());
-  }
-  return ConstantDataVector::get(curMask->getContext(), Elts);
-}
-
 // Tunnel through insert/extract element and shuffle to find original source
 // of scalar value, or specified element (vecIdx) of vector value.
 Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
@@ -2745,7 +2727,7 @@ struct SampleHelper {
     DXASSERT_NOMSG(compareValue);
   }
   void SetClamp(CallInst *CI, unsigned clampIdx) {
-    if (clamp = ReadHLOperand(CI, clampIdx)) {
+    if ((clamp = ReadHLOperand(CI, clampIdx))) {
       if (clamp->getType()->isVectorTy()) {
         IRBuilder<> Builder(CI);
         clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
@@ -4915,8 +4897,6 @@ Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode
   Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
   Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
 
-  Value *Args[] = {opArg, handle};
-
   IRBuilder<> Builder(CI);
   Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
 

+ 2 - 2
lib/HLSL/HLSignatureLower.cpp

@@ -1064,7 +1064,7 @@ void HLSignatureLower::GenerateDxilInputsOutputs(DXIL::SignatureKind SK) {
   DxilFunctionProps &props = HLM.GetDxilFunctionProps(Entry);
   Module &M = *(HLM.GetModule());
 
-  OP::OpCode opcode;
+  OP::OpCode opcode = (OP::OpCode)-1;
   switch (SK) {
   case DXIL::SignatureKind::Input:
     opcode = OP::OpCode::LoadInput;
@@ -1614,7 +1614,7 @@ void HLSignatureLower::GenerateEmitIndicesOperation(Value *indicesOutput) {
     // Skip first pointer idx which must be 0.
     GEPIt++;
     Value *primIdx = GEPIt.getOperand();
-    DXASSERT(++GEPIt == E, "invalid GEP here");
+    DXASSERT(++GEPIt == E, "invalid GEP here"); (void)E;
 
     auto GepUser = GEP->user_begin();
     auto GepUserE = GEP->user_end();

+ 28 - 17
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -29,6 +29,7 @@
 #include "dxc/HLSL/DxilGenerationPass.h" // HLSL Change
 #include "dxc/HLSL/HLMatrixLowerPass.h" // HLSL Change
 #include "dxc/HLSL/ComputeViewIdState.h" // HLSL Change
+#include "dxc/HLSL/DxilValueCache.h" // HLSL Change
 
 using namespace llvm;
 
@@ -207,6 +208,7 @@ void PassManagerBuilder::populateFunctionPassManager(
 
 // HLSL Change Starts
 static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
+
   // Don't do any lowering if we're targeting high-level.
   if (HLSLHighLevel) {
     MPM.add(createHLEmitMetadataPass());
@@ -241,30 +243,31 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
     // Do this before change vector to array.
     MPM.add(createDxilLegalizeEvalOperationsPass());
   }
-  else {
-    // This should go between matrix lower and dynamic indexing vector to array,
-    // because matrix lower may create dynamically indexed global vectors,
-    // which should become locals. If they are turned into arrays first,
-    // this pass will ignore them as it only works on scalars and vectors.
-    MPM.add(createLowerStaticGlobalIntoAlloca());
-  }
+  // This should go between matrix lower and dynamic indexing vector to array,
+  // because matrix lower may create dynamically indexed global vectors,
+  // which should become locals. If they are turned into arrays first,
+  // this pass will ignore them as it only works on scalars and vectors.
+  MPM.add(createLowerStaticGlobalIntoAlloca());
 
   // Change dynamic indexing vector to array.
-  MPM.add(createDynamicIndexingVectorToArrayPass(NoOpt));
+  MPM.add(createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */));
+
+  // Rotate the loops before, mem2reg, since it messes up dbg.value's
+  MPM.add(createLoopRotatePass());
 
   // mem2reg
-  // Special Mem2Reg pass that only happens if optimization is
-  // enabled or loop unroll is needed.
-  MPM.add(createLoopRotatePass()); // Rotate the loops before, mem2reg, since it messes up dbg.value's
+  // Special Mem2Reg pass that skips precise marker.
   MPM.add(createDxilConditionalMem2RegPass(NoOpt));
 
   if (!NoOpt) {
     MPM.add(createDxilConvergentMarkPass());
   }
 
-  MPM.add(createSimplifyInstPass());
+  if (!NoOpt)
+    MPM.add(createSimplifyInstPass());
 
-  MPM.add(createCFGSimplificationPass());
+  if (!NoOpt)
+    MPM.add(createCFGSimplificationPass());
 
   // Passes to handle [unroll]
   // Needs to happen after SROA since loop count may depend on
@@ -289,14 +292,20 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   // Propagate precise attribute.
   MPM.add(createDxilPrecisePropagatePass());
 
-  MPM.add(createSimplifyInstPass());
+  if (!NoOpt)
+    MPM.add(createSimplifyInstPass());
 
   // scalarize vector to scalar
-  MPM.add(createScalarizerPass());
+  MPM.add(createScalarizerPass(!NoOpt /* AllowFolding */));
 
-  MPM.add(createSimplifyInstPass());
+  if (!NoOpt)
+    MPM.add(createSimplifyInstPass());
 
-  MPM.add(createCFGSimplificationPass());
+  if (!NoOpt)
+    MPM.add(createCFGSimplificationPass());
+
+  // Remove vector instructions
+  MPM.add(createDxilEliminateVectorPass());
 
   MPM.add(createDeadCodeEliminationPass());
 
@@ -313,6 +322,7 @@ void PassManagerBuilder::populateModulePassManager(
   if (OptLevel == 0) {
     if (!HLSLHighLevel) {
       MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
+      MPM.add(createDxilInsertNoopsPass()); // HLSL Change - insert noop instructions
     }
 
     if (Inliner) {
@@ -339,6 +349,7 @@ void PassManagerBuilder::populateModulePassManager(
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilLegalizeSampleOffsetPass());
+      MPM.add(createDxilFinalizeNoopsPass());
       MPM.add(createDxilFinalizeModulePass());
       MPM.add(createComputeViewIdStatePass());
       MPM.add(createDxilDeadFunctionEliminationPass());

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -48,6 +48,7 @@ add_llvm_library(LLVMScalarOpts
   DxilLoopUnroll.cpp # HLSL Change
   DxilEraseDeadRegion.cpp # HLSL Change
   DxilFixConstArrayInitializer.cpp # HLSL Change
+  DxilEliminateVector.cpp # HLSL Change
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
   SimplifyCFGPass.cpp

+ 239 - 0
lib/Transforms/Scalar/DxilEliminateVector.cpp

@@ -0,0 +1,239 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilEliminateVector.cpp                                                   //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// A pass to remove vector instructions, especially in situations where      //
+// optimizations are turned off.                                             //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/DIBuilder.h"
+
+#include "dxc/HLSL/DxilValueCache.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+class DxilEliminateVector : public FunctionPass {
+public:
+  static char ID;
+  DxilEliminateVector() : FunctionPass(ID) {
+    initializeDxilEliminateVectorPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DxilValueCache>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.setPreservesAll(); // DxilValueCache is safe. CFG is not changed, so DT is okay.
+  }
+
+  bool TryRewriteDebugInfoForVector(InsertElementInst *IE);
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override { return "Dxil Eliminate Vector"; }
+};
+
+char DxilEliminateVector::ID;
+}
+
+static
+MetadataAsValue *GetAsMetadata(Instruction *I) {
+  if (auto *L = LocalAsMetadata::getIfExists(I)) {
+    if (auto *DINode = MetadataAsValue::getIfExists(I->getContext(), L)) {
+      return DINode;
+    }
+  }
+  return nullptr;
+}
+
+static
+bool CollectVectorElements(Value *V, SmallVector<Value *, 4> &Elements) {
+  if (InsertElementInst *IE = dyn_cast<InsertElementInst>(V)) {
+
+    Value *Vec = IE->getOperand(0);
+    Value *Element = IE->getOperand(1);
+    Value *Index = IE->getOperand(2);
+
+    if (!isa<UndefValue>(Vec)) {
+      if (!CollectVectorElements(Vec, Elements))
+        return false;
+    }
+
+    ConstantInt *ConstIndex = dyn_cast<ConstantInt>(Index);
+    if (!ConstIndex)
+      return false;
+
+    uint64_t IdxValue = ConstIndex->getLimitedValue();
+    if (IdxValue < 4) {
+      if (Elements.size() <= IdxValue)
+        Elements.resize(IdxValue+1);
+      Elements[IdxValue] = Element;
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool HasDebugValue(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  MetadataAsValue *DebugI = GetAsMetadata(I);
+  if (!DebugI) return false;
+
+  for (User *U : DebugI->users()) {
+    if (isa<DbgValueInst>(U))
+      return true;
+  }
+
+  return false;
+}
+
+bool DxilEliminateVector::TryRewriteDebugInfoForVector(InsertElementInst *IE) {
+
+  // If this is not ever used as meta-data, there's no debug
+  MetadataAsValue *DebugI = GetAsMetadata(IE);
+  if (!DebugI)
+    return false;
+
+  // Collect @dbg.value instructions
+  SmallVector<DbgValueInst *, 4> DbgValueInsts;
+  for (User *U : DebugI->users()) {
+    if (DbgValueInst *DbgValueI = dyn_cast<DbgValueInst>(U)) {
+      DbgValueInsts.push_back(DbgValueI);
+    }
+  }
+
+  if (!DbgValueInsts.size())
+    return false;
+
+  SmallVector<Value *, 4> Elements;
+  if (!CollectVectorElements(IE, Elements))
+    return false;
+
+  DIBuilder DIB(*IE->getModule());
+  const DataLayout &DL = IE->getModule()->getDataLayout();
+
+  // Go through the elements and create @dbg.value with bit-piece
+  // expressions for them.
+  bool Changed = false;
+  for (DbgValueInst *DVI : DbgValueInsts) {
+
+    DIExpression *ParentExpr = DVI->getExpression();
+    unsigned BitpieceOffset = 0;
+    if (ParentExpr->isBitPiece())
+      BitpieceOffset = ParentExpr->getBitPieceOffset();
+
+    for (unsigned i = 0; i < Elements.size(); i++) {
+      if (!Elements[i])
+        continue;
+
+      if (HasDebugValue(Elements[i]))
+        continue;
+
+      unsigned ElementSize = DL.getTypeAllocSizeInBits(Elements[i]->getType());
+      DIExpression *Expr = DIB.createBitPieceExpression(BitpieceOffset + i * ElementSize, ElementSize);
+      DIB.insertDbgValueIntrinsic(Elements[i], 0, DVI->getVariable(), Expr, DVI->getDebugLoc(), DVI);
+
+      Changed = true;
+    }
+
+    DVI->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+bool DxilEliminateVector::runOnFunction(Function &F) {
+
+  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
+
+  std::vector<Instruction *> VectorInsts;
+  std::vector<AllocaInst *> VectorAllocas;
+
+  // Collect the vector insts and allocas.
+  for (auto &BB : F) {
+    for (auto &I : BB)
+      if (isa<InsertElementInst>(&I) || isa<ExtractElementInst>(&I))
+        VectorInsts.push_back(&I);
+      else if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+        if (AI->getAllocatedType()->isVectorTy() && llvm::isAllocaPromotable(AI))
+          VectorAllocas.push_back(AI);
+      }
+  }
+
+  if (!VectorInsts.size())
+    return false;
+
+  bool Changed = false;
+
+  // Promote the allocas if they exist. They could very well exist
+  // because of precise.
+  if (VectorAllocas.size()) {
+    PromoteMemToReg(VectorAllocas, *DT);
+    Changed = true;
+  }
+
+  // Iteratively try to remove them, untill all gone or unable to
+  // do it anymore.
+  unsigned Attempts = VectorInsts.size();
+  for (unsigned i = 0; i < Attempts; i++) {
+    bool LocalChange = false;
+
+    for (unsigned j = 0; j < VectorInsts.size();) {
+      auto *I = VectorInsts[j];
+      bool Remove = false;
+
+      if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+        TryRewriteDebugInfoForVector(IE);
+      }
+
+      if (Value *V = DVC->GetValue(I, DT)) {
+        I->replaceAllUsesWith(V);
+        Remove = true;
+      }
+      else if (I->user_empty()) {
+        Remove = true;
+      }
+
+      // Do the remove
+      if (Remove) {
+        LocalChange = true;
+        I->eraseFromParent();
+        VectorInsts.erase(VectorInsts.begin() + j);
+      }
+      else {
+        j++;
+      }
+    }
+
+    Changed |= LocalChange;
+    if (!LocalChange)
+      break;
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createDxilEliminateVectorPass() {
+  return new DxilEliminateVector();
+}
+
+INITIALIZE_PASS(DxilEliminateVector, "dxil-elim-vector", "Dxil Eliminate Vectors", false, false)

+ 3 - 3
lib/Transforms/Scalar/DxilEraseDeadRegion.cpp

@@ -56,9 +56,9 @@ struct DxilEraseDeadRegion : public FunctionPass {
     return false;
   }
 
-  bool FindDeadRegion(PostDominatorTree *PDT, BasicBlock *Begin, BasicBlock *End, std::set<BasicBlock *> &Region) {
+  bool FindDeadRegion(BasicBlock *Begin, BasicBlock *End, std::set<BasicBlock *> &Region) {
     std::vector<BasicBlock *> WorkList;
-    auto ProcessSuccessors = [this, &WorkList, Begin, End, &Region, PDT](BasicBlock *BB) {
+    auto ProcessSuccessors = [this, &WorkList, Begin, End, &Region](BasicBlock *BB) {
       for (BasicBlock *Succ : successors(BB)) {
         if (Succ == End) continue;
         if (Succ == Begin) return false; // If goes back to the beginning, there's a loop, give up.
@@ -115,7 +115,7 @@ struct DxilEraseDeadRegion : public FunctionPass {
       return false;
 
     std::set<BasicBlock *> Region;
-    if (!this->FindDeadRegion(PDT, Common, BB, Region))
+    if (!this->FindDeadRegion(Common, BB, Region))
       return false;
 
     // If BB branches INTO the region, forming a loop give up.

+ 4 - 23
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -1129,10 +1129,10 @@ public:
   static char ID;
 
   // Function overrides that resolve options when used for DxOpt
-  void applyOptions(PassOptions O) {
+  void applyOptions(PassOptions O) override {
     GetPassOptionBool(O, "NoOpt", &NoOpt, false);
   }
-  void dumpConfig(raw_ostream &OS) {
+  void dumpConfig(raw_ostream &OS) override {
     FunctionPass::dumpConfig(OS);
     OS << ",NoOpt=" << NoOpt;
   }
@@ -1144,10 +1144,8 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequiredID(LoopSimplifyID);
     AU.setPreservesCFG();
   }
 
@@ -1224,33 +1222,16 @@ public:
     return Changed;
   }
 
-  bool runOnFunction(Function &F) {
+  bool runOnFunction(Function &F) override {
 
 
-    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
-    bool NeedPromote = false;
     bool Changed = false;
     
     Changed |= RemoveAllUnusedAllocas(F);
-
-    if (NoOpt) {
-      // If any of the functions are marked as full unroll.
-      for (Loop *L : *LI) {
-        if (HasLoopsMarkedUnrollRecursive(L)) {
-          NeedPromote = true;
-          break;
-        }
-      }
-    }
-    else {
-      NeedPromote = true;
-    }
-
-    if (NeedPromote)
-      Changed |= Mem2Reg(F, *DT, *AC);
+    Changed |= Mem2Reg(F, *DT, *AC);
 
     return Changed;
   }

+ 2 - 9
lib/Transforms/Scalar/LowerTypePasses.cpp

@@ -110,16 +110,9 @@ bool LowerTypePass::runOnFunction(Function &F, bool HasDbgInfo) {
   for (AllocaInst *A : workList) {
     AllocaInst *NewA = lowerAlloca(A);
     if (HasDbgInfo) {
-      // Add debug info.
+      // Migrate debug info.
       DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A);
-      if (DDI) {
-        Value *DDIVar = MetadataAsValue::get(Context, DDI->getRawVariable());
-        Value *DDIExp = MetadataAsValue::get(Context, DDI->getRawExpression());
-        Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(NewA));
-        IRBuilder<> debugBuilder(DDI);
-        debugBuilder.CreateCall(DDI->getCalledFunction(),
-                                {VMD, DDIVar, DDIExp});
-      }
+      if (DDI) DDI->setOperand(0, MetadataAsValue::get(Context, LocalAsMetadata::get(NewA)));
     }
     // Replace users.
     lowerUseWithNewValue(A, NewA);

+ 35 - 0
lib/Transforms/Scalar/SROA.cpp

@@ -57,6 +57,7 @@
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "dxc/DXIL/DxilUtil.h"  // HLSL Change - don't sroa resource type.
+#include "dxc/DXIL/DxilMetadataHelper.h"  // HLSL Change - support strided debug variables
 #include "dxc/HLSL/HLMatrixType.h"  // HLSL Change - don't sroa matrix types.
 
 #if __cplusplus >= 201103L && !defined(NDEBUG)
@@ -4310,11 +4311,29 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
     DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
                   /*AllowUnresolved*/ false);
     bool IsSplit = Pieces.size() > 1;
+
+    // HLSL Change Begins
+    // Take into account debug stride in extra metadata
+    std::vector<hlsl::DxilDIArrayDim> ArrayDims;
+    unsigned FirstFragmentOffsetInBits = 0;
+    if (!hlsl::DxilMDHelper::GetVariableDebugLayout(DbgDecl, FirstFragmentOffsetInBits, ArrayDims)
+      && Expr->isBitPiece()) {
+      FirstFragmentOffsetInBits = Expr->getBitPieceOffset();
+    }
+
+    unsigned FragmentSizeInBits = DL.getTypeAllocSizeInBits(AI.getAllocatedType());
+    for (const hlsl::DxilDIArrayDim& ArrayDim : ArrayDims) {
+      assert(FragmentSizeInBits % ArrayDim.NumElements == 0);
+      FragmentSizeInBits /= ArrayDim.NumElements;
+    }
+    // HLSL Change Ends
+
     for (auto Piece : Pieces) {
       // Create a piece expression describing the new partition or reuse AI's
       // expression if there is only one partition.
       auto *PieceExpr = Expr;
       if (IsSplit || Expr->isBitPiece()) {
+#if 0 // HLSL Change - Handle Strides
         // If this alloca is already a scalar replacement of a larger aggregate,
         // Piece.Offset describes the offset inside the scalar.
         uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
@@ -4327,6 +4346,22 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
             continue;
           Size = std::min(Size, AbsEnd - Start);
         }
+// HLSL Change Begins
+#else
+        // Find the fragment from the original user variable in which this piece falls
+        uint64_t PieceFragmentIndex = Piece.Offset / FragmentSizeInBits;
+
+        // Compute the offset in the original user variable
+        uint64_t StartInFragment = Piece.Offset % FragmentSizeInBits;
+        uint64_t Start = FirstFragmentOffsetInBits + Piece.Offset % FragmentSizeInBits;
+        for (auto ArrayDimIter = ArrayDims.rbegin(); ArrayDimIter != ArrayDims.rend(); ++ArrayDimIter) {
+          Start += ArrayDimIter->StrideInBits * (PieceFragmentIndex % ArrayDimIter->NumElements);
+          PieceFragmentIndex /= ArrayDimIter->NumElements;
+        }
+
+        uint64_t Size = std::min<uint64_t>(Piece.Size, FragmentSizeInBits - StartInFragment);
+#endif
+// HLSL Change Ends
         PieceExpr = DIB.createBitPieceExpression(Start, Size);
       }
 

+ 194 - 50
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -79,6 +79,7 @@ public:
   // Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
   // Then do SROA on V.
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
+                                  Type *&BrokenUpTy, uint64_t &NumInstances,
                                   IRBuilder<> &Builder, bool bFlatVector,
                                   bool hasPrecise, DxilTypeSystem &typeSys,
                                   const DataLayout &DL,
@@ -317,6 +318,10 @@ bool SROA_HLSL::runOnFunction(Function &F) {
   HLModule &HLM = M->GetOrCreateHLModule();
   DxilTypeSystem &typeSys = HLM.GetTypeSystem();
 
+  // Establish debug metadata layout name in the context in advance so the name
+  // is serialized in both debug and non-debug compilations.
+  (void)M->getContext().getMDKindID(DxilMDHelper::kDxilVariableDebugLayoutMDName);
+
   bool Changed = performScalarRepl(F, typeSys);
   // change rest memcpy into ld/st.
   MemcpySplitter splitter(F.getContext(), typeSys);
@@ -778,6 +783,124 @@ static unsigned getNestedLevelInStruct(const Type *ty) {
   return lvl;
 }
 
+// After SROA'ing a given value into a series of elements,
+// creates the debug info for the storage of the individual elements.
+static void addDebugInfoForElements(Value *ParentVal,
+    Type *BrokenUpTy, uint64_t NumInstances,
+    ArrayRef<Value*> Elems, const DataLayout &DatLayout,
+    DIBuilder *DbgBuilder) {
+
+  // Extract the data we need from the parent value,
+  // depending on whether it is an alloca, argument or global variable.
+  Type *ParentTy;
+  unsigned ParentBitPieceOffset = 0;
+  std::vector<DxilDIArrayDim> DIArrayDims;
+  DIVariable *ParentDbgVariable;
+  DIExpression *ParentDbgExpr;
+  DILocation *ParentDbgLocation;
+  Instruction *DbgDeclareInsertPt = nullptr;
+  if (isa<GlobalVariable>(ParentVal)) {
+    llvm_unreachable("Not implemented: sroa debug info propagation for global vars.");
+  }
+  else {
+    if (AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal))
+      ParentTy = ParentAlloca->getAllocatedType();
+    else
+      ParentTy = cast<Argument>(ParentVal)->getType();
+
+    DbgDeclareInst *ParentDbgDeclare = llvm::FindAllocaDbgDeclare(ParentVal);
+    if (ParentDbgDeclare == nullptr) return;
+
+    // Get the bit piece offset
+    if ((ParentDbgExpr = ParentDbgDeclare->getExpression())) {
+      if (ParentDbgExpr->isBitPiece()) {
+        ParentBitPieceOffset = ParentDbgExpr->getBitPieceOffset();
+      }
+    }
+    
+    ParentDbgVariable = ParentDbgDeclare->getVariable();
+    ParentDbgLocation = ParentDbgDeclare->getDebugLoc();
+    DbgDeclareInsertPt = ParentDbgDeclare;
+
+    // Read the extra layout metadata, if any
+    unsigned ParentBitPieceOffsetFromMD = 0;
+    if (DxilMDHelper::GetVariableDebugLayout(ParentDbgDeclare, ParentBitPieceOffsetFromMD, DIArrayDims)) {
+      // The offset is redundant for local variables and only necessary for global variables.
+      DXASSERT(ParentBitPieceOffsetFromMD == ParentBitPieceOffset,
+        "Bit piece offset mismatch between llvm.dbg.declare and DXIL metadata.");
+    }
+  }
+
+  // If the type that was broken up is nested in arrays,
+  // then each element will also be an array,
+  // but the continuity between successive elements of the original aggregate
+  // will have been broken, such that we must store the stride to rebuild it.
+  // For example: [2 x {i32, float}] => [2 x i32], [2 x float], each with stride 64 bits
+  if (NumInstances > 1 && Elems.size() > 1) {
+    // Existing dimensions already account for part of the stride
+    uint64_t NewDimNumElements = NumInstances;
+    for (const DxilDIArrayDim& ArrayDim : DIArrayDims) {
+      DXASSERT(NewDimNumElements % ArrayDim.NumElements == 0,
+        "Debug array stride is inconsistent with the number of elements.");
+      NewDimNumElements /= ArrayDim.NumElements;
+    }
+
+    // Add a stride dimension
+    DxilDIArrayDim NewDIArrayDim = {};
+    NewDIArrayDim.StrideInBits = (unsigned)DatLayout.getTypeAllocSizeInBits(BrokenUpTy);
+    NewDIArrayDim.NumElements = (unsigned)NewDimNumElements;
+    DIArrayDims.emplace_back(NewDIArrayDim);
+  }
+  else {
+    DIArrayDims.clear();
+  }
+
+  // Create the debug info for each element
+  for (unsigned ElemIdx = 0; ElemIdx < Elems.size(); ++ElemIdx) {
+    // Figure out the offset of the element in the broken up type
+    unsigned ElemBitPieceOffset = ParentBitPieceOffset;
+    if (StructType *ParentStructTy = dyn_cast<StructType>(BrokenUpTy)) {
+      DXASSERT_NOMSG(Elems.size() == ParentStructTy->getNumElements());
+      ElemBitPieceOffset += (unsigned)DatLayout.getStructLayout(ParentStructTy)->getElementOffsetInBits(ElemIdx);
+    }
+    else if (VectorType *ParentVecTy = dyn_cast<VectorType>(BrokenUpTy)) {
+      DXASSERT_NOMSG(Elems.size() == ParentVecTy->getNumElements());
+      ElemBitPieceOffset += (unsigned)DatLayout.getTypeStoreSizeInBits(ParentVecTy->getElementType()) * ElemIdx;
+    }
+    else if (ArrayType *ParentArrayTy = dyn_cast<ArrayType>(BrokenUpTy)) {
+      DXASSERT_NOMSG(Elems.size() == ParentArrayTy->getNumElements());
+      ElemBitPieceOffset += (unsigned)DatLayout.getTypeStoreSizeInBits(ParentArrayTy->getElementType()) * ElemIdx;
+    }
+
+    // The bit_piece can only represent the leading contiguous bytes.
+    // If strides are involved, we'll need additional metadata.
+    Type *ElemTy = Elems[ElemIdx]->getType()->getPointerElementType();
+    unsigned ElemBitPieceSize = (unsigned)DatLayout.getTypeAllocSizeInBits(ElemTy);
+    for (const DxilDIArrayDim& ArrayDim : DIArrayDims)
+      ElemBitPieceSize /= ArrayDim.NumElements;
+
+    if (AllocaInst *ElemAlloca = dyn_cast<AllocaInst>(Elems[ElemIdx])) {
+      // Local variables get an @llvm.dbg.declare plus optional metadata for layout stride information.
+      DIExpression *ElemDbgExpr = nullptr;
+      if (ElemBitPieceOffset == 0 && DatLayout.getTypeAllocSizeInBits(ParentTy) == ElemBitPieceSize) {
+        ElemDbgExpr = DbgBuilder->createExpression();
+      }
+      else {
+        ElemDbgExpr = DbgBuilder->createBitPieceExpression(ElemBitPieceOffset, ElemBitPieceSize);
+      }
+
+      DXASSERT_NOMSG(DbgBuilder != nullptr);
+      DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DbgBuilder->insertDeclare(
+        ElemAlloca, cast<DILocalVariable>(ParentDbgVariable), ElemDbgExpr, ParentDbgLocation, DbgDeclareInsertPt));
+
+      if (!DIArrayDims.empty()) DxilMDHelper::SetVariableDebugLayout(EltDDI, ElemBitPieceOffset, DIArrayDims);
+    }
+    else {
+      llvm_unreachable("Non-AllocaInst SROA'd elements.");
+    }
+  }
+}
+
 /// Returns first GEP index that indexes a struct member, or 0 otherwise.
 /// Ignores initial ptr index.
 static unsigned FindFirstStructMemberIdxInGEP(GEPOperator *GEP) {
@@ -993,11 +1116,6 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
   std::priority_queue<AllocaInst *, std::vector<AllocaInst *>,
                       std::function<bool(AllocaInst *, AllocaInst *)>>
       WorkList(size_cmp);
-  std::unordered_map<AllocaInst*, DbgDeclareInst*> DDIMap;
-  // HLSL Change - Begin
-  std::unordered_map<AllocaInst*, unsigned> OffsetMap; // Map to keep track the offset of an alloca
-                                                       // in the variable that it's a part of.
-  // HLSL Change - End
   // Scan the entry basic block, adding allocas to the worklist.
   BasicBlock &BB = F.getEntryBlock();
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
@@ -1006,9 +1124,6 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
         WorkList.push(A);
         // merge GEP use for the allocs
         HLModule::MergeGepUse(A);
-        if (DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A)) {
-          DDIMap[A] = DDI;
-        }
       }
     }
 
@@ -1074,9 +1189,11 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
       IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
       bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
 
+      Type *BrokenUpTy = nullptr;
+      uint64_t NumInstances = 1;
       bool SROAed = SROA_Helper::DoScalarReplacement(
-          AI, Elts, Builder, /*bFlatVector*/ true, hasPrecise, typeSys, DL,
-          DeadInsts);
+        AI, Elts, BrokenUpTy, NumInstances, Builder,
+        /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts);
 
       if (SROAed) {
         Type *Ty = AI->getAllocatedType();
@@ -1096,45 +1213,13 @@ bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
             }
           }
         }
-// HLSL Change - Begin
-        unsigned parentOffset = 0;
-        auto offsetIt = OffsetMap.find(AI);
-        if (offsetIt != OffsetMap.end())
-          parentOffset = offsetIt->second;
-// HLSL Change - End
-
-        DbgDeclareInst *DDI = nullptr;
-        unsigned debugOffset = 0;
-        auto iter = DDIMap.find(AI);
-        if (iter != DDIMap.end()) {
-          DDI = iter->second;
-        }
-        // Push Elts into workList.
-        for (auto iter = Elts.begin(); iter != Elts.end(); iter++) {
-          AllocaInst *Elt = cast<AllocaInst>(*iter);
-          WorkList.push(Elt);
-          if (DDI) {
-            Type *Ty = Elt->getAllocatedType();
-            unsigned size = DL.getTypeAllocSize(Ty);
-#if 0 // HLSL Change
-            DIExpression *DDIExp =
-                DIB.createBitPieceExpression(debugOffset, size);
-#else // HLSL Change
 
-            DIExpression *DDIExp = nullptr;
-            if (parentOffset+debugOffset == 0 && DL.getTypeAllocSize(AI->getAllocatedType()) == size) {
-              DDIExp = DIB.createExpression();
-            }
-            else {
-              DDIExp = DIB.createBitPieceExpression((parentOffset+debugOffset) * 8, size * 8);
-            }
-            OffsetMap[Elt] = parentOffset+debugOffset;
-#endif // HLSL Change
-            debugOffset += size;
-            DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DIB.insertDeclare(
-                Elt, DDI->getVariable(), DDIExp, DDI->getDebugLoc(), DDI));
-            DDIMap[Elt] = EltDDI;
-          }
+        addDebugInfoForElements(AI, BrokenUpTy, NumInstances, Elts, DL, &DIB);
+
+        // Push Elts into workList.
+        for (unsigned EltIdx = 0; EltIdx < Elts.size(); ++EltIdx) {
+          AllocaInst *EltAlloca = cast<AllocaInst>(Elts[EltIdx]);
+          WorkList.push(EltAlloca);
         }
 
         // Now erase any instructions that were made dead while rewriting the
@@ -2927,6 +3012,7 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
 /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
 /// Then do SROA on V.
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
+                                      Type *&BrokenUpTy, uint64_t &NumInstances,
                                       IRBuilder<> &Builder, bool bFlatVector,
                                       bool hasPrecise, DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
@@ -2953,6 +3039,9 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       return false;
     }
 
+    BrokenUpTy = ST;
+    NumInstances = 1;
+
     unsigned numTypes = ST->getNumContainedTypes();
     Elts.reserve(numTypes);
     DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
@@ -2978,14 +3067,16 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
     }
     Type *ElTy = AT->getElementType();
     SmallVector<ArrayType *, 4> nestArrayTys;
-
     nestArrayTys.emplace_back(AT);
+    NumInstances = AT->getNumElements();
     // support multi level of array
     while (ElTy->isArrayTy()) {
       ArrayType *ElAT = cast<ArrayType>(ElTy);
       nestArrayTys.emplace_back(ElAT);
+      NumInstances *= ElAT->getNumElements();
       ElTy = ElAT->getElementType();
     }
+    BrokenUpTy = ElTy;
 
     if (ElTy->isStructTy() &&
         // Skip Matrix type.
@@ -3020,6 +3111,8 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
             // Only support 1 dim split.
             nestArrayTys.size() > 1)
           return false;
+        BrokenUpTy = AT;
+        NumInstances = 1;
         for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
           AllocaInst *NA = AllocaBuilder.CreateAlloca(ElTy, nullptr,
                                                 V->getName() + "." + Twine(i));
@@ -3034,6 +3127,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
+      BrokenUpTy = ElVT;
       Elts.reserve(ElVT->getNumElements());
 
       ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
@@ -5096,9 +5190,12 @@ void SROA_Parameter_HLSL::flattenArgument(
 
     // Not flat vector for entry function currently.
     bool SROAed = false;
+    Type *BrokenUpTy = nullptr;
+    uint64_t NumInstances = 1;
     if (inputQual != DxilParamInputQual::InPayload) {
       SROAed = SROA_Helper::DoScalarReplacement(
-        V, Elts, Builder, /*bFlatVector*/ false, annotation.IsPrecise(),
+        V, Elts, BrokenUpTy, NumInstances, Builder, 
+        /*bFlatVector*/ false, annotation.IsPrecise(),
         dxilTypeSys, DL, DeadInsts);
     }
 
@@ -6073,6 +6170,7 @@ ModulePass *llvm::createSROA_Parameter_HLSL() {
 namespace {
 class LowerStaticGlobalIntoAlloca : public ModulePass {
   HLModule *m_pHLModule;
+  DebugInfoFinder m_DbgFinder;
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -6081,6 +6179,7 @@ public:
 
   bool runOnModule(Module &M) override {
     m_pHLModule = &M.GetOrCreateHLModule();
+    m_DbgFinder.processModule(M);
 
     // Lower static global into allocas.
     std::vector<GlobalVariable *> staticGVs;
@@ -6109,6 +6208,49 @@ private:
 };
 }
 
+static
+DIGlobalVariable *FindGlobalVariableFor(const DebugInfoFinder &DbgFinder, GlobalVariable *GV) {
+  for (auto *DGV : DbgFinder.global_variables()) {
+    if (DGV->getVariable() == GV) {
+      return DGV;
+    }
+  }
+  return nullptr;
+}
+
+static
+void PatchDebugInfo(const DebugInfoFinder &DbgFinder, Function *F, GlobalVariable *GV, AllocaInst *AI) {
+  if (!DbgFinder.compile_unit_count())
+    return;
+
+  // Find the subprogram for function
+  DISubprogram *Subprogram = nullptr;
+  for (DISubprogram *SP : DbgFinder.subprograms()) {
+    if (SP->getFunction() == F) {
+      Subprogram = SP;
+      break;
+    }
+  }
+
+  DIGlobalVariable *DGV = FindGlobalVariableFor(DbgFinder, GV);
+  if (!DGV)
+    return;
+
+  DITypeIdentifierMap EmptyMap;
+  DIBuilder DIB(*GV->getParent());
+  DIScope *ParentScope = DGV->getScope();
+
+  DIScope *Scope = DIB.createLexicalBlock(Subprogram, ParentScope->getFile(), 0, 0);
+  DebugLoc Loc = DebugLoc::get(0, 0, Scope);
+
+  std::string Name = "global.";
+  Name += DGV->getName();
+
+  DIType *Ty = DGV->getType().resolve(EmptyMap);
+  DILocalVariable *ConvertedLocalVar = DIB.createLocalVariable(llvm::dwarf::Tag::DW_TAG_variable, Scope, Name, DGV->getFile(), DGV->getLine(), Ty);
+  DIB.insertDeclare(AI, ConvertedLocalVar, DIB.createExpression(ArrayRef<int64_t>()), Loc, AI->getNextNode());
+}
+
 bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
   DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
   unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
@@ -6134,6 +6276,8 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV
   }
 
   ReplaceConstantWithInst(GV, AI, Builder);
+  PatchDebugInfo(m_DbgFinder, F, GV, AI);
+
   GV->eraseFromParent();
   return true;
 }

+ 45 - 3
lib/Transforms/Scalar/Scalarizer.cpp

@@ -45,13 +45,19 @@ typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
 // component of a scattered vector or vector pointer.
 class Scatterer {
 public:
+  bool AllowFolding = false; // HLSL Change
   Scatterer() {}
 
   // Scatter V into Size components.  If new instructions are needed,
   // insert them before BBI in BB.  If Cache is nonnull, use it to cache
   // the results.
+#if 0 // HLSL Change
   Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
             ValueVector *cachePtr = nullptr);
+#else // HLSL Change
+  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, bool AllowFolding,
+            ValueVector *cachePtr = nullptr);
+#endif // HLSL Change
 
   // Return component I, creating a new Value for it if necessary.
   Value *operator[](unsigned I);
@@ -143,6 +149,14 @@ class Scalarizer : public FunctionPass,
 public:
   static char ID;
 
+// HLSL Change Begin
+  bool AllowFolding = false;
+  Scalarizer(bool AllowFolding) :
+    FunctionPass(ID),
+    AllowFolding(AllowFolding) {
+    initializeScalarizerPass(*PassRegistry::getPassRegistry());
+  }
+// HLSL Change End
   Scalarizer() :
     FunctionPass(ID) {
     initializeScalarizerPass(*PassRegistry::getPassRegistry());
@@ -197,10 +211,16 @@ char Scalarizer::ID = 0;
 
 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
                              "Scalarize vector operations", false, false)
-
+#if 0 // HLSL Change
 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
                      ValueVector *cachePtr)
   : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+#else // HLSL Change
+Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+                     bool AllowFolding,
+                     ValueVector *cachePtr)
+  : AllowFolding(AllowFolding), BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+#endif // HLSL Change
   Type *Ty = V->getType();
   PtrTy = dyn_cast<PointerType>(Ty);
   if (PtrTy)
@@ -221,6 +241,7 @@ Value *Scatterer::operator[](unsigned I) {
   if (CV[I])
     return CV[I];
   IRBuilder<> Builder(BB, BBI);
+  Builder.AllowFolding = AllowFolding; // HLSL Change
   if (PtrTy) {
     if (!CV[0]) {
       Type *Ty =
@@ -295,19 +316,25 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
     auto InsertPoint = BB->begin();
     while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
       InsertPoint++;
-    return Scatterer(BB, InsertPoint, V, &Scattered[V]);
+    Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
     // HLSL Change - End
   }
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
     // Put the scattered form of an instruction directly after the
     // instruction.
     BasicBlock *BB = VOp->getParent();
+#if 0 // HLSL Change
     return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
                      V, &Scattered[V]);
+#else // HLSL Change
+    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
+                     V, AllowFolding, &Scattered[V]);
+#endif // HLSL Change
   }
   // In the fallback case, just put the scattered before Point and
   // keep the result local to Point.
-  return Scatterer(Point->getParent(), Point, V);
+  // return Scatterer(Point->getParent(), Point, V); // HLSL Change
+  return Scatterer(Point->getParent(), Point, V, AllowFolding);
 }
 
 // Replace Op with the gathered form of the components in CV.  Defer the
@@ -404,6 +431,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
 
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(I.getParent(), &I);
+  Builder.AllowFolding = AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&I, I.getOperand(0));
   Scatterer Op1 = scatter(&I, I.getOperand(1));
   assert(Op0.size() == NumElems && "Mismatched binary operation");
@@ -424,6 +452,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
 
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(SI.getParent(), &SI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op1 = scatter(&SI, SI.getOperand(1));
   Scatterer Op2 = scatter(&SI, SI.getOperand(2));
   assert(Op1.size() == NumElems && "Mismatched select");
@@ -465,6 +494,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     return false;
 
   IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   unsigned NumElems = VT->getNumElements();
   unsigned NumIndices = GEPI.getNumIndices();
 
@@ -499,6 +529,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
 
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(CI.getParent(), &CI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&CI, CI.getOperand(0));
   assert(Op0.size() == NumElems && "Mismatched cast");
   ValueVector Res;
@@ -519,6 +550,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
   IRBuilder<> Builder(BCI.getParent(), &BCI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
   ValueVector Res;
   Res.resize(DstNumElems);
@@ -606,6 +638,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
 
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(PHI.getParent(), &PHI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   ValueVector Res;
   Res.resize(NumElems);
 
@@ -637,6 +670,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(LI.getParent(), &LI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
   ValueVector Res;
   Res.resize(NumElems);
@@ -662,6 +696,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(SI.getParent(), &SI);
+  Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
   Scatterer Val = scatter(&SI, FullValue);
 
@@ -762,6 +797,7 @@ bool Scalarizer::finish() {
       BasicBlock *BB = Op->getParent();
       unsigned Count = Ty->getVectorNumElements();
       IRBuilder<> Builder(BB, Op);
+      Builder.AllowFolding = this->AllowFolding; // HLSL Change
       if (isa<PHINode>(Op))
         Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
       for (unsigned I = 0; I < Count; ++I)
@@ -786,6 +822,12 @@ bool Scalarizer::finish() {
   return true;
 }
 
+// HLSL Change Begin
+FunctionPass *llvm::createScalarizerPass(bool AllowFolding) {
+  Scalarizer *pass = new Scalarizer(AllowFolding);
+  return pass;
+}
+// HLSL Change End
 FunctionPass *llvm::createScalarizerPass() {
   return new Scalarizer();
 }

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1418,7 +1418,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       funcProps->ShaderProps.GS.instanceCount = 1;
   }
 
-  // Computer shader.
+  // Compute shader
   if (const HLSLNumThreadsAttr *Attr = FD->getAttr<HLSLNumThreadsAttr>()) {
     if (isMS) {
       funcProps->ShaderProps.MS.numThreads[0] = Attr->getX();

+ 1 - 0
tools/clang/lib/Frontend/CMakeLists.txt

@@ -45,6 +45,7 @@ add_clang_library(clangFrontend
 
   DEPENDS
   ClangDriverOptions
+  TablegenHLSLOptions
 
   LINK_LIBS
   clangAST

+ 39 - 0
tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_nested_noopt.hlsl

@@ -0,0 +1,39 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// Test that SROA for local nested arrays of structs/vectors
+// produces and preserves the extra metadata to express strides
+// in the original user variable.
+
+// CHECK-DAG: alloca [6 x float]
+// CHECK-DAG: alloca [6 x float]
+// CHECK-DAG: %[[a:.*]] = alloca [12 x i32]
+
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [12 x i32]* %[[a]], metadata !{{.*}}, metadata ![[aexpr:.*]]), !dbg !{{.*}}, !dx.dbg.varlayout ![[alayout:.*]]
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [6 x float]* %{{.*}}, metadata !{{.*}}, metadata !{{.*}}), !dbg !{{.*}}, !dx.dbg.varlayout !{{.*}}
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [6 x float]* %{{.*}}, metadata !{{.*}}, metadata !{{.*}}), !dbg !{{.*}}, !dx.dbg.varlayout !{{.*}}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "var"
+
+// Debug info for field a should include a contiguous chunk of 32*4=128 bits at offset 0,
+// and the rest expressed as array stride metadata:
+// CHECK-DAG: ![[aexpr]] = !DIExpression(DW_OP_bit_piece, 0, 128)
+// CHECK-DAG: ![[alayout]] = !{i32 0, i32 256, i32 3}
+
+// Debug info for b should be in two parts (b.x and b.y),
+// it should have bit pieces for the first float,
+// and have associated array stride metadata.
+
+// CHECK-DAG: !DIExpression(DW_OP_bit_piece, 128, 32)
+// CHECK-DAG: !{i32 128, i32 256, i32 3, i32 64, i32 2}
+// CHECK-DAG: !DIExpression(DW_OP_bit_piece, 160, 32)
+// CHECK-DAG: !{i32 160, i32 256, i32 3, i32 64, i32 2}
+
+typedef struct { int a[4]; float2 b[2]; } type[3];
+
+int main() : OUT {
+  type var = (type)0;
+  return var[0].a[0];
+}

+ 28 - 0
tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_noopt.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// Check that debug info is preserved with stride information
+// for arrays of structs getting SROA'd down into arrays of struct elements,
+// when compiling without optimizations.
+
+// CHECK-DAG: %[[intalloca:.*]] = alloca [2 x i32]
+// CHECK-DAG: %[[floatalloca:.*]] = alloca [2 x float]
+
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [2 x i32]* %[[intalloca]], metadata !{{.*}}, metadata ![[intdiexpr:.*]]), !dbg !{{.*}}, !dx.dbg.varlayout ![[intlayout:.*]]
+// CHECK-DAG: call void @llvm.dbg.declare(metadata [2 x float]* %[[floatalloca]], metadata !{{.*}}, metadata ![[floatdiexpr:.*]]), !dbg !{{.*}}, !dx.dbg.varlayout ![[floatlayout:.*]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: !DILocalVariable(tag: DW_TAG_auto_variable, name: "var"
+
+// CHECK-DAG: ![[intdiexpr]] = !DIExpression(DW_OP_bit_piece, 0, 32)
+// CHECK-DAG: ![[intlayout]] = !{i32 0, i32 64, i32 2}
+// CHECK-DAG: ![[floatdiexpr]] = !DIExpression(DW_OP_bit_piece, 32, 32)
+// CHECK-DAG: ![[floatlayout]] = !{i32 32, i32 64, i32 2}
+
+struct intfloat { int i; float f; };
+float4 main(int i : IN) : OUT
+{
+  intfloat var[2] = (intfloat[2])i;
+  return float4(var[0].i, var[0].f, var[1].i, var[1].f);
+}

+ 41 - 0
tools/clang/test/CodeGenHLSL/debug/locals/array_of_structs_opt.hlsl

@@ -0,0 +1,41 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi %s | FileCheck %s
+
+// Check that debug info is preserved for arrays of structs
+// getting SROA'd down into arrays of struct elements,
+// then SROA'd into individual allocas and promoted to registers,
+// when compiling with optimizations.
+
+// CHECK-DAG: %[[i1:.*]] = extractvalue %dx.types.CBufRet.i32 %{{.*}}, 0
+// CHECK-DAG: %[[f1:.*]] = extractvalue %dx.types.CBufRet.f32 %{{.*}}, 1
+// CHECK-DAG: %[[i2:.*]] = extractvalue %dx.types.CBufRet.i32 %{{.*}}, 2
+// CHECK-DAG: %[[f2:.*]] = extractvalue %dx.types.CBufRet.f32 %{{.*}}, 3
+
+// CHECK-DAG: call void @llvm.dbg.value(metadata i32 %[[i1]], i64 0, metadata !{{.*}}, metadata ![[i1expr:.*]])
+// CHECK-DAG: call void @llvm.dbg.value(metadata float %[[f1]], i64 0, metadata !{{.*}}, metadata ![[f1expr:.*]])
+// CHECK-DAG: call void @llvm.dbg.value(metadata i32 %[[i2]], i64 0, metadata !{{.*}}, metadata ![[i2expr:.*]])
+// CHECK-DAG: call void @llvm.dbg.value(metadata float %[[f2]], i64 0, metadata !{{.*}}, metadata ![[f2expr:.*]])
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: !DILocalVariable(tag: DW_TAG_auto_variable, name: "var"
+
+// CHECK-DAG: ![[i1expr]] = !DIExpression(DW_OP_bit_piece, 0, 32)
+// CHECK-DAG: ![[f1expr]] = !DIExpression(DW_OP_bit_piece, 32, 32)
+// CHECK-DAG: ![[i2expr]] = !DIExpression(DW_OP_bit_piece, 64, 32)
+// CHECK-DAG: ![[f2expr]] = !DIExpression(DW_OP_bit_piece, 96, 32)
+
+struct intfloat { int i; float f; };
+
+int cb_i1; float cb_f1;
+int cb_i2; float cb_f2;
+
+void main(
+    out int o_i1 : I1, out float o_f1 : F1,
+    out int o_i2 : I2, out float o_f2 : F2)
+{
+  intfloat var[2] = { cb_i1, cb_f1, cb_i2, cb_f2 };
+  
+  o_i1 = var[0].i; o_f1 = var[0].f;
+  o_i2 = var[1].i; o_f2 = var[1].f;
+}

+ 2 - 1
tools/clang/test/CodeGenSPIRV/spirv.debug.cl-option.hlsl

@@ -3,7 +3,8 @@
 // This test ensures that command line options used to generate this module
 // are added to the SPIR-V using OpModuleProcessed.
 
-// CHECK: OpModuleProcessed "dxc-cl-option: -E main -T ps_6_1 
+// CHECK: OpModuleProcessed "dxc-cl-option:
+// CHECK-SAME: -E main -T ps_6_1 
 // CHECK-SAME: -fspv-target-env=vulkan1.1 -Zi
 
 void main() {}

+ 18 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/dyn_vec.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test for dynamically index vector
+
+[RootSignature("")]
+float main(float4 vec : COLOR, int index : INDEX) : SV_Target {
+  // CHECK: alloca [4 x float]
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: load
+  return vec[index];
+}
+
+

+ 11 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/fcgl.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc %s -E main -T ps_6_0 -Zi -Od -fcgl | FileCheck %s
+
+// CHECK: @main
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+[RootSignature("")]
+float4 main() : SV_Target {
+  return float4(1,1,1,1);
+};

+ 23 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/global_dyn_vec.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test for dynamically index vector
+
+static float4 MyGlobal;
+
+// CHECK-NOT: internal global
+
+[RootSignature("")]
+float main(float4 vec : COLOR, int index : INDEX) : SV_Target {
+  MyGlobal = vec.zyxw;
+  // CHECK: alloca [4 x float]
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: load
+  return MyGlobal[index];
+}
+
+

+ 22 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/global_vec.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od -Zi | FileCheck %s
+
+// Test for dynamically index vector
+
+static float4 MyGlobal;
+
+// CHECK-NOT: internal global
+
+[RootSignature("")]
+float4 main(float4 vec : COLOR, int index : INDEX) : SV_Target {
+  MyGlobal = vec.zyxw;
+  // CHECK-NOT: alloca
+  return MyGlobal;
+}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.MyGlobal
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.MyGlobal
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.MyGlobal
+

+ 26 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/gv_od.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E main -T ps_6_0 -Od -Zi %s | FileCheck %s
+
+// Regression test for making sure that static variables
+// still work with -Od.
+
+// CHECK: @main
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK: !DILocalVariable(tag: DW_TAG_variable, name: "global.gG"
+
+static bool gG;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+Texture2D f(bool foo) {
+  return foo ? tex0 : tex1;
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=2))")]
+float4 main() : sv_target {
+  gG = true;
+  return f(gG).Load(0);
+};

+ 9 - 4
tools/clang/test/HLSLFileCheck/dxil/debug/locals/matrix_no_opt.hlsl

@@ -2,14 +2,19 @@
 
 // Test that local matrices preserve debug info without optimizations
 
-// CHECK: %[[mat:.*]] = alloca [4 x i32]
-// CHECK: call void @llvm.dbg.declare(metadata [4 x i32]* %[[mat]], metadata ![[divar:.*]], metadata ![[diexpr:.*]])
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar:.*]], metadata ![[diexpr0:[0-9]+]]
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar]], metadata ![[diexpr1:[0-9]+]]
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar]], metadata ![[diexpr2:[0-9]+]]
+// CHECK: @llvm.dbg.value(metadata i32 %{{.*}}, metadata ![[divar]], metadata ![[diexpr3:[0-9]+]]
 
 // Exclude quoted source file (see readme)
 // CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
 
 // CHECK-DAG: ![[divar]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mat"
-// CHECK-DAG: ![[diexpr]] = !DIExpression()
+// CHECK-DAG: ![[diexpr0]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
+// CHECK-DAG: ![[diexpr1]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
+// CHECK-DAG: ![[diexpr2]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
+// CHECK-DAG: ![[diexpr3]] = !DIExpression(DW_OP_bit_piece, {{[0-9]+}}, {{[0-9]+}})
 
 int2x2 cb_mat;
 int main() : OUT
@@ -18,4 +23,4 @@ int main() : OUT
   int2x2 mat = cb_mat;
   // Consume all values but return a scalar to avoid another alloca [4 x i32]
   return determinant(mat);
-}
+}

+ 2 - 2
tools/clang/test/HLSLFileCheck/dxil/debug/locals/temporary_dbg_declare.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T vs_6_0 -Od -Zi %s | FileCheck %s
+// RUN: %dxc -E main -T vs_6_0 -Od -Zi -fcgl %s | FileCheck %s
 
 // Test that dbg.declares are emitted for temporaries.
 
@@ -11,4 +11,4 @@ int main(int x : IN) : OUT {
 }
 
 // Exclude quoted source file (see readme)
-// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}

+ 28 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/mat3x2_dbg.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %{{.*}}, i64 0, metadata ![[var_md:[0-9]+]], metadata ![[expr_md:[0-9]+]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: ![[var_md]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_mat"
+// CHECK-DAG: ![[expr_md]] = !DIExpression(DW_OP_bit_piece,
+
+[RootSignature("")]
+uint3x2 main(uint2 uv : TEXCOORD) : MY_MAT {
+  uint3x2 my_mat = uint3x2(
+    uv.y * 0.5, uv.x * 0.5,
+    1.0 - uv.x, 1.0 - uv.x,
+    1.0 - uv.x, 1.0 - uv.x
+  );
+  return my_mat;
+}
+

+ 25 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/mat_dbg.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E main -T vs_6_0 -Zi -Od %s | FileCheck %s
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %{{.*}}, i64 0, metadata ![[var_md:[0-9]+]], metadata ![[expr_md:[0-9]+]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: ![[var_md]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_mat"
+// CHECK-DAG: ![[expr_md]] = !DIExpression(DW_OP_bit_piece,
+
+[RootSignature("")]
+uint2x2 main(uint2 uv : TEXCOORD) : MY_MAT {
+  uint2x2 my_mat = uint2x2(
+    uv.y * 0.5, uv.x * 0.5,
+    1.0 - uv.x, 1.0 - uv.x
+  );
+  return my_mat;
+}
+

+ 5 - 8
tools/clang/test/HLSLFileCheck/dxil/debug/misc/intrinsic4_dbg.hlsl

@@ -11,17 +11,14 @@
 // CHECK: calculateLOD
 // CHECK: i1 false
 // CHECK: texture2DMSGetSamplePosition
-// CHECK: llvm.dbg.declare(metadata i32* %width
-// CHECK: llvm.dbg.declare(metadata i32* %height
-// CHECK: llvm.dbg.declare(metadata i32* %numOfLevels
 // CHECK: getDimensions
-// CHECK: llvm.dbg.declare(metadata i32* %arraySize
-// CHECK: llvm.dbg.declare(metadata i32* %numSamples
+// CHECK: llvm.dbg.value(metadata i32 %
+// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
-// CHECK: llvm.dbg.declare(metadata i32* %numStructs
-// CHECK: llvm.dbg.declare(metadata i32* %stride
+// CHECK: llvm.dbg.value(metadata i32 %
+// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
-// CHECK: llvm.dbg.declare(metadata i32* %dim
+// CHECK: llvm.dbg.value(metadata i32 %
 // CHECK: getDimensions
 
 // Exclude quoted source file (see readme)

+ 1 - 1
tools/clang/test/HLSLFileCheck/dxil/debug/misc/share_mem_dbg.hlsl

@@ -22,7 +22,7 @@
 // Make sure source info contents exist.
 // CHECK: !{!"DefineA=1", !"DefineB=0"}
 // CHECK: share_mem_dbg.hlsl"}
-// CHECK: !{!"-E", !"main", !"-T", !"cs_6_0", !"-Zi", !"-Od", !"-D", !"DefineA", !"-D", !"DefineB=0", !"-Qstrip_reflect"}
+// CHECK: !{!"-E", !"main", !"-T", !"cs_6_0", !"-Zi", !"-Od", !"-D", !"DefineA", !"-D", !"DefineB=0", !"-Qstrip_reflect", !"-Qembed_debug"}
 
 
 struct S {

+ 33 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/no_fold.hlsl

@@ -0,0 +1,33 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float x = 10;
+
+  float y = x + 5;
+  // CHECK: fadd
+  float z = y * 2;
+  // CHECK: fmul
+  float w = z / 0.5;
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: br i1
+  if (w >= 0) {
+    tex = tex1;
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(x,y,z,w);
+}
+

+ 38 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float2 xy = float2(10, 20);
+
+  float2 zw = xy + float2(5, 30);
+  // CHECK: fadd
+  // CHECK: fadd
+
+  float2 foo = zw * 2;
+  // CHECK: fmul
+  // CHECK: fmul
+
+  float2 bar = foo / 0.5;
+  // CHECK: fdiv
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: br i1
+  if (foo.x+bar.y >= 0) {
+    tex = tex1;
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(foo,bar);
+}
+

+ 34 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/no_fold_vec_array.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
+
+// Check that arrays of vectors still work with -Od
+// without all the inst-simplify
+
+[RootSignature("")]
+float2 main(int index : INDEX) : SV_Target {
+
+  float2 values[4] = {
+    float2(1,2),
+    float2(3,4),
+    float2(5,6),
+    float2(7,8),
+  };
+
+  // CHECK: alloca [4 x float]
+  // CHECK: alloca [4 x float]
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+  // CHECK: store
+
+  // CHECK: load
+  // CHECK: load
+
+  return values[3];
+}
+

+ 149 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_call.hlsl

@@ -0,0 +1,149 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+
+typedef float4 MyCoolFloat4; 
+static float4 myStaticGlobalVar = float4(1.0, 1.0, 1.0, 1.0);
+
+// Local var with same name as outer scope
+float4 localScopeVar_func(float4 val)
+{
+    float4 color = val * val;
+    return color;
+}
+
+// Local var with same name as register
+float4 localRegVar_func(float4 val)
+{
+    float4 r1 = val;
+    return r1;
+}
+
+// Array
+float4 array_func(float4 val)
+{
+    float result[4];
+    result[0] = val.x;
+    result[1] = val.y;
+    result[2] = val.z;
+    result[3] = val.w;
+    return float4(result[0], result[1], result[2], result[3]);
+}
+
+// Typedef
+float4 typedef_func(float4 val)
+{
+    MyCoolFloat4 result = val;
+    return result;
+}
+
+// Global
+float4 global_func(float4 val)
+{
+    myStaticGlobalVar *= val;
+    return myStaticGlobalVar;
+}
+
+float4 depth4(float4 val)
+{
+    val = val * val;
+    return val;
+}
+
+float4 depth3(float4 val)
+{
+    val = depth4(val) * val;
+    return val;
+}
+
+float4 depth2(float4 val)
+{
+    val = depth3(val) * val;
+    return val;
+}
+
+[RootSignature("")]
+float4 main( float4 unused : SV_POSITION, float4 color : COLOR ) : SV_Target
+{
+    float4 ret1 = localScopeVar_func(color);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret2 = localRegVar_func(ret1);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // ** copy **
+    // CHECK: call void @llvm.donothing()
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret3 = array_func(ret2);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // CHECK: store
+    // CHECK: store
+    // CHECK: store
+    // CHECK: store
+    // CHECK: load
+    // CHECK: load
+    // CHECK: load
+    // CHECK: load
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret4 = typedef_func(ret3);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // ** copy **
+    // CHECK: call void @llvm.donothing()
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret5 = global_func(ret4);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // CHECK: fmul
+    // ** return **
+    // CHECK: call void @llvm.donothing()
+
+    float4 ret6 = depth2(ret5);
+    // ** call **
+    // CHECK: call void @llvm.donothing()
+    // depth2() {
+      // ** call **
+      // CHECK: call void @llvm.donothing()
+      // depth3() {
+        // ** call **
+        // CHECK: call void @llvm.donothing()
+        // depth4() {
+          // CHECK: fmul
+          // CHECK: fmul
+          // CHECK: fmul
+          // CHECK: fmul
+          // CHECK: call void @llvm.donothing()
+        // }
+        // CHECK: fmul
+        // CHECK: fmul
+        // CHECK: fmul
+        // CHECK: fmul
+        // CHECK: call void @llvm.donothing()
+      // }
+      // CHECK: fmul
+      // CHECK: fmul
+      // CHECK: fmul
+      // CHECK: fmul
+      // CHECK: call void @llvm.donothing()
+    // }
+
+    return max(ret6, color);
+    // CHECK: call void @llvm.donothing()
+}
+
+

+ 38 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float x = 10;
+  // CHECK: call void @llvm.donothing()
+
+  float y = x + 5;
+  // CHECK: fadd
+  float z = y * 2;
+  // CHECK: fmul
+  float w = z / 0.5;
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: call void @llvm.donothing()
+
+  // CHECK: br i1
+  if (w >= 0) {
+    tex = tex1;
+    // CHECK: call void @llvm.donothing()
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(x,y,z,w);
+  // CHECK: call void @llvm.donothing()
+}
+

+ 43 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/noops_no_fold_vec.hlsl

@@ -0,0 +1,43 @@
+// RUN: %dxilver 1.6 | %dxc -E main -T ps_6_6 %s -Od | FileCheck %s
+
+// Test that non-const arithmetic are not optimized away
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+[RootSignature("DescriptorTable(SRV(t0), SRV(t1))")]
+float4 main() : SV_Target {
+
+  float2 xy = float2(10, 20);
+  // CHECK: call void @llvm.donothing()
+
+  float2 zw = xy + float2(5, 30);
+  // CHECK: fadd
+  // CHECK: fadd
+
+  float2 foo = zw * 2;
+  // CHECK: fmul
+  // CHECK: fmul
+
+  float2 bar = foo / 0.5;
+  // CHECK: fdiv
+  // CHECK: fdiv
+
+  Texture2D tex = tex0; 
+  // CHECK: call void @llvm.donothing()
+
+  // CHECK: br i1
+  if (foo.x+bar.y >= 0) {
+    tex = tex1;
+    // CHECK: call void @llvm.donothing()
+    // CHECK: br
+  }
+
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  // CHECK: fadd
+  return tex.Load(0) + float4(foo,bar);
+  // CHECK: call void @llvm.donothing()
+}
+

+ 44 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/calculations.hlsl

@@ -0,0 +1,44 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// CHECK: @main
+
+// CHECK: !dx.controlflow.hints
+// CHECK: !dx.controlflow.hints
+// CHECK: !dx.controlflow.hints
+
+// Make sure that even when we don't simplify cfg, DxilValueCache
+// is still able to figure out values.
+
+static int g_foo;
+static int g_bar;
+static int g_baz;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+
+Texture2D f(int foo, int bar, int baz) {
+  foo += 10;
+  if (foo+bar < baz*2)
+    return tex0;
+  else
+    return tex1;
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=2))")]
+float4 main() : sv_target {
+  g_foo = 10;
+  [branch]
+  if (g_foo > 10)
+    g_foo = 30;
+  [branch]
+  if (g_foo < 50)
+    g_foo = 90;
+  [branch]
+  if (g_foo > 80)
+    g_bar = 20;
+
+  g_baz = 30;
+  return f(g_foo, g_bar, g_baz).Load(0);
+};
+
+

+ 32 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg.hlsl

@@ -0,0 +1,32 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// CHECK: @main
+
+static bool gG;
+static bool gG2;
+static bool gG3;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+Texture2D tex2 : register(t2);
+
+Texture2D f(bool foo) {
+  if (foo)
+    return tex0;
+  else
+    return tex1;
+}
+
+Texture2D h(bool foo3) {
+  return foo3 ? f(gG2) : tex2;
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=3))")]
+float4 main() : sv_target {
+  gG = true;
+  gG2 = false;
+  gG3 = false;
+  return h(gG).Load(0);
+};
+
+

+ 40 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/cfg2.hlsl

@@ -0,0 +1,40 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// CHECK: @main
+
+static bool gG;
+static bool gG2;
+static bool gG3;
+
+Texture2D tex0 : register(t0);
+Texture2D tex1 : register(t1);
+Texture2D tex2 : register(t2);
+
+Texture2D f(bool foo) {
+  [branch]
+  if (foo)
+    return tex0;
+  else
+    return tex1;
+}
+Texture2D g(bool foo) {
+  [branch]
+  if (foo)
+    return tex1;
+  else
+    return tex2;
+}
+
+Texture2D h(bool foo3) {
+  return foo3 ? f(gG2) : g(gG3);
+}
+
+[RootSignature("DescriptorTable(SRV(t0, numDescriptors=3))")]
+float4 main() : sv_target {
+  gG = true;
+  gG2 = false;
+  gG3 = false;
+  return h(gG).Load(0);
+};
+
+

+ 34 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/lexicalBlock.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+
+// Make sure we are generating branches instead of selects.
+
+// CHECK: @main
+[RootSignature("")]
+float4 main(float4 color : COLOR) : SV_Target
+{
+    int value = 0;
+    
+    // CHECK: br i1
+    if (color.x < 0.5)
+        value = 1;
+        // CHECK: br
+ 
+    // CHECK: br i1
+    if (color.y < 0.5)
+        value = 2;
+        // CHECK: br 
+        
+    // CHECK: br i1
+    if (color.z < 0.5)
+        value = 3;
+        // CHECK: br
+        
+    // CHECK: br i1
+    if (color.w < 0.5)
+        value = 4;
+        // CHECK: br
+                        
+    float4 result = float4(value,1,1,1);
+  
+    return result;
+}

+ 22 - 0
tools/clang/test/HLSLFileCheck/dxil/debug/vec_dbg.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 -Zi -Od %s | FileCheck %s
+
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %
+// CHECK: void @llvm.dbg.value(metadata i32 %{{.*}}, i64 0, metadata ![[var_md:[0-9]+]], metadata ![[expr_md:[0-9]+]]
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+// CHECK-DAG: ![[var_md]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_uv"
+// CHECK-DAG: ![[expr_md]] = !DIExpression(DW_OP_bit_piece,
+
+[RootSignature("")]
+float2 main(uint2 uv : TEXCOORD) : SV_Target {
+  uint2 my_uv = {
+    uv.y * 0.5,
+    1.0 - uv.x,
+  };
+  return my_uv;
+}
+

+ 1 - 1
tools/clang/test/HLSLFileCheck/hlsl/control_flow/if_else/if2.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 -Od %s -fcgl | FileCheck %s
 
 // CHECK: !"dx.controlflow.hints", i32 2
 

+ 6 - 4
tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/sample_kwd.hlsl

@@ -1,9 +1,11 @@
 // RUN: %dxc -T ps_6_0 -Od -E main %s | FileCheck %s
 
-// CHECK: %precise = alloca float, align 4
-// CHECK: %globallycoherent = alloca i32, align 4
-// CHECK: %sample = alloca float, align 4
-// CHECK: %center = alloca float, align 4
+// Used to check the following, but allocas are now gone, so they no longer exist.
+//
+//    %precise = alloca float, align 4
+//    %globallycoherent = alloca i32, align 4
+//    %sample = alloca float, align 4
+//    %center = alloca float, align 4
 
 // CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %MyBuffer_UAV_structbuf, i32 0, i32 0)
 // CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %MyBuffer_UAV_structbuf, i32 0, i32 16)

+ 3 - 3
tools/clang/test/HLSLFileCheck/hlsl/types/boolean/bool_scalar_swizzle.hlsl

@@ -1,11 +1,11 @@
-// RUN: %dxc -E main -T ps_6_0 -O0 %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 -O0 %s -fcgl | FileCheck %s
 
 // This is mostly a regression test for a bug where a bitcast
 // from i32* to i1* was emitted.
 
 // CHECK: alloca i32
-// CHECK: alloca [2 x i32]
-// CHECK-NOT: bitcast
+// CHECK: alloca <2 x i32>
+// CHECK-NOT: bitcast i32* %b to <1 x i1>*
 
 float main() : SV_Target
 {

+ 5 - 18
tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store.hlsl

@@ -3,52 +3,39 @@
 // Ensure that bools are converted from/to their memory representation when loaded/stored
 // in local variables.
 
-// Local variables should never be i1s
-// CHECK-NOT: alloca {{.*}}i1
-
 int main(int i : I) : OUT
 {
     // CHECK: icmp eq i32 {{.*}}, 42
-    // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool s = i == 42;
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1 v = i == 42;
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1x1 m = i == 42;
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool sa[1] = { i == 42 };
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1 va[1] = { i == 42 };
     // CHECK: icmp eq i32 {{.*}}, 42
     // CHECK: zext i1 {{.*}} to i32
-    // CHECK: store i32
     bool1x1 ma[1] = { i == 42 };
 
-    // CHECK: load i32
-    // CHECK: icmp ne i32 {{.*}}, 0
+    // Used to check icmp ne i32 {{.*}}, 0
+    // but since variable "s" was never stored
+    // to memory, it stayed as an i1 value,
+    // so no need to icmp that to 0.
     return (s
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         && v.x
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         && m._11
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         && sa[0]
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         && va[0].x
-        // CHECK: load i32
         // CHECK: icmp ne i32 {{.*}}, 0
         && ma[0]._11) ? 1 : 2;
-}
+}

+ 27 - 0
tools/clang/test/HLSLFileCheck/hlsl/types/boolean/local_load_store_scalar.hlsl

@@ -0,0 +1,27 @@
+// RUN: %dxc -E main -T vs_6_0 -O0 %s -fcgl | FileCheck %s
+
+// Ensure that bools are converted from/to their memory representation when loaded/stored
+// in local variables.
+
+// Local variables should never be i1s
+// CHECK-NOT: alloca {{.*}}i1
+
+int main(int i : I) : OUT
+{
+    // CHECK: alloca i32
+    // CHECK: icmp eq i32 {{.*}}, 42
+    // CHECK: zext i1 {{.*}} to i32
+    bool s = i == 42;
+    bool1 v = i == 42;
+    bool1x1 m = i == 42;
+    bool sa[1] = { i == 42 };
+    bool1 va[1] = { i == 42 };
+    bool1x1 ma[1] = { i == 42 };
+
+    return (s
+        && v.x
+        && m._11
+        && sa[0]
+        && va[0].x
+        && ma[0]._11) ? 1 : 2;
+}

+ 2 - 2
tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/center/center_kwd.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -T ps_6_0 -Od -E main %s | FileCheck %s 
 
-// CHECK: %center = alloca float, align 4
+// CHECK: @main
 
 // make sure 'center' is allowed as an interpolation modifier
 float main(center float t : T) : SV_TARGET
@@ -8,4 +8,4 @@ float main(center float t : T) : SV_TARGET
     // and also as an identifier
     float center = 10.0f;
     return center * 2;
-}
+}

+ 3 - 2
tools/clang/test/HLSLFileCheck/samples/d3d11/SubD11_SubDToBezierHS.hlsl

@@ -3,8 +3,9 @@
 // CHECK: primitiveID
 // CHECK: storePatchConstant
 // CHECK: main
-// CHECK: primitiveID
-// CHECK: bufferLoad
+// These values are not used other than being stored into array allocas, which we now remove.
+// xCHECK: primitiveID
+// xCHECK: bufferLoad
 // CHECK: storeOutput
 
 //--------------------------------------------------------------------------------------

+ 2 - 2
tools/clang/tools/dxa/dxa.cpp

@@ -95,7 +95,7 @@ void DxaContext::Assemble() {
         }
       }
 
-      WriteBlobToFile(pContainer, StringRefUtf16(OutputFilename));
+      WriteBlobToFile(pContainer, StringRefUtf16(OutputFilename), DXC_CP_UTF8); // TODO: Support DefaultTextCodePage
     }
   }
 }
@@ -298,7 +298,7 @@ bool DxaContext::ExtractPart(const char *pName) {
         std::swap(pModuleBlob, pContent);
       }
 
-      WriteBlobToFile(pContent, StringRefUtf16(OutputFilename));
+      WriteBlobToFile(pContent, StringRefUtf16(OutputFilename), DXC_CP_UTF8); // TODO: Support DefaultTextCodePage
       printf("%Iu bytes written to %s\n", pContent->GetBufferSize(), OutputFilename.c_str());
       return true;
     }

+ 58 - 27
tools/clang/tools/dxclib/dxc.cpp

@@ -159,8 +159,8 @@ public:
   void GetCompilerVersionInfo(llvm::raw_string_ostream &OS);
 };
 
-static void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, llvm::StringRef FName) {
-  ::dxc::WriteBlobToFile(pBlob, StringRefUtf16(FName));
+static void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, llvm::StringRef FName, UINT32 defaultTextCodePage) {
+  ::dxc::WriteBlobToFile(pBlob, StringRefUtf16(FName), defaultTextCodePage);
 }
 
 static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
@@ -191,6 +191,16 @@ static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
   }
 }
 
+static void WriteDxcOutputToFile(DXC_OUT_KIND kind, IDxcResult *pResult, UINT32 textCodePage) {
+  if (pResult->HasOutput(kind)) {
+    CComPtr<IDxcBlob> pData;
+    CComPtr<IDxcBlobUtf16> pName;
+    IFT(pResult->GetOutput(kind, IID_PPV_ARGS(&pData), &pName));
+    if (pName && pName->GetStringLength() > 0)
+      WriteBlobToFile(pData, pName->GetStringPointer(), textCodePage);
+  }
+}
+
 // This function is called either after the compilation is done or /dumpbin option is provided
 // Performing options that are used to process dxil container.
 int DxcContext::ActOnBlob(IDxcBlob *pBlob) {
@@ -211,7 +221,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
     if (!m_Opts.ExtractRootSignature) {
       CComPtr<IDxcBlob> pResult;
       UpdatePart(pBlob, &pResult);
-      WriteBlobToFile(pResult, m_Opts.OutputObject);
+      WriteBlobToFile(pResult, m_Opts.OutputObject, m_Opts.DefaultTextCodePage);
     }
   }
 
@@ -229,7 +239,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
     if (pDebugBlob != nullptr) {
       IFTBOOLMSG(pDebugBlobName && *pDebugBlobName, E_INVALIDARG,
         "/Fd was specified but no debug name was produced");
-      WriteBlobToFile(pDebugBlob, pDebugBlobName);
+      WriteBlobToFile(pDebugBlob, pDebugBlobName, m_Opts.DefaultTextCodePage);
     } else {
       // Note: This is for load from binary case
       WritePartToFile(pBlob, hlsl::DFCC_ShaderDebugInfoDXIL, m_Opts.DebugFile);
@@ -240,7 +250,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
   if (m_Opts.ExtractRootSignature) {
     CComPtr<IDxcBlob> pRootSignatureContainer;
     ExtractRootSignature(pBlob, &pRootSignatureContainer);
-    WriteBlobToFile(pRootSignatureContainer, m_Opts.OutputObject);
+    WriteBlobToFile(pRootSignatureContainer, m_Opts.OutputObject, m_Opts.DefaultTextCodePage);
   }
 
   // Extract and write private data.
@@ -299,7 +309,7 @@ int DxcContext::ActOnBlob(IDxcBlob *pBlob, IDxcBlob *pDebugBlob, LPCWSTR pDebugB
     WriteHeader(pDisassembleResult, pBlob, varName,
                 StringRefUtf16(m_Opts.OutputHeader));
   } else if (!m_Opts.AssemblyCode.empty()) {
-    WriteBlobToFile(pDisassembleResult, m_Opts.AssemblyCode);
+    WriteBlobToFile(pDisassembleResult, m_Opts.AssemblyCode, m_Opts.DefaultTextCodePage);
   } else {
     WriteBlobToConsole(pDisassembleResult);
   }
@@ -364,7 +374,7 @@ void DxcContext::UpdatePart(IDxcBlob *pSource, IDxcBlob **ppResult) {
     CComPtr<IDxcBlobEncoding> pErrors;
     IFT(pBuilderResult->GetErrorBuffer(&pErrors));
     if (pErrors != nullptr) {
-      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile, m_Opts.DefaultTextCodePage);
     }
   }
   else {
@@ -490,7 +500,7 @@ int DxcContext::VerifyRootSignature() {
     if (!m_Opts.OutputWarningsFile.empty()) {
       CComPtr<IDxcBlobEncoding> pErrors;
       IFT(pOperationResult->GetErrorBuffer(&pErrors));
-      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile, m_Opts.DefaultTextCodePage);
     }
     else {
       WriteOperationErrorsToConsole(pOperationResult, m_Opts.OutputWarnings);
@@ -801,7 +811,7 @@ int DxcContext::Compile() {
   if (!m_Opts.OutputWarningsFile.empty()) {
     CComPtr<IDxcBlobEncoding> pErrors;
     IFT(pCompileResult->GetErrorBuffer(&pErrors));
-    WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+    WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile, m_Opts.DefaultTextCodePage);
   }
   else {
     WriteOperationErrorsToConsole(pCompileResult, m_Opts.OutputWarnings);
@@ -812,10 +822,16 @@ int DxcContext::Compile() {
   if (SUCCEEDED(status) || m_Opts.AstDump || m_Opts.OptDump) {
     CComPtr<IDxcBlob> pProgram;
     IFT(pCompileResult->GetResult(&pProgram));
-    pCompiler.Release();
-    pCompileResult.Release();
     if (pProgram.p != nullptr) {
       ActOnBlob(pProgram.p, pDebugBlob, outputPDBPath.c_str());
+
+      // Now write out extra parts
+      CComPtr<IDxcResult> pResult;
+      if (SUCCEEDED(pCompileResult->QueryInterface(&pResult))) {
+        WriteDxcOutputToFile(DXC_OUT_ROOT_SIGNATURE, pResult, m_Opts.DefaultTextCodePage);
+        WriteDxcOutputToFile(DXC_OUT_SHADER_HASH, pResult, m_Opts.DefaultTextCodePage);
+        WriteDxcOutputToFile(DXC_OUT_REFLECTION, pResult, m_Opts.DefaultTextCodePage);
+      }
     }
   }
   return status;
@@ -861,7 +877,7 @@ void DxcContext::Preprocess() {
   if (SUCCEEDED(status)) {
     CComPtr<IDxcBlob> pProgram;
     IFT(pPreprocessResult->GetResult(&pProgram));
-    WriteBlobToFile(pProgram, m_Opts.Preprocess);
+    WriteBlobToFile(pProgram, m_Opts.Preprocess, m_Opts.DefaultTextCodePage);
   }
 }
 
@@ -873,20 +889,34 @@ static void WriteString(HANDLE hFile, _In_z_ LPCSTR value, LPCWSTR pFileName) {
 
 void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
                              llvm::Twine &pVariableName, LPCWSTR pFileName) {
-  CHandle file(CreateFileW(pFileName, GENERIC_WRITE, FILE_SHARE_READ, nullptr,
-                           CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
-  if (file == INVALID_HANDLE_VALUE) {
-    IFT_Data(HRESULT_FROM_WIN32(GetLastError()), pFileName);
-  }
+  // Use older interface for compatibility with older DLL.
+  CComPtr<IDxcLibrary> pLibrary;
+  IFT(CreateInstance(CLSID_DxcLibrary, &pLibrary));
+
+  std::string s;
+  llvm::raw_string_ostream OS(s);
 
   {
-    std::string s;
-    llvm::raw_string_ostream OS(s);
+    // Not safe to assume pDisassembly is utf8, must GetBlobAsUtf8 first.
+    CComPtr<IDxcBlobEncoding> pDisasmEncoding;
+    IFT(pLibrary->GetBlobAsUtf8(pDisassembly, &pDisasmEncoding));
+
+    // Don't fail if this QI doesn't succeed (older dll, perhaps)
+    CComPtr<IDxcBlobUtf8> pDisasmUtf8;
+    pDisasmEncoding->QueryInterface(&pDisasmUtf8);
+
+    LPCSTR pBytes = pDisasmUtf8 ? pDisasmUtf8->GetStringPointer()
+                                : (LPCSTR)pDisasmEncoding->GetBufferPointer();
+    // IDxcBlobUtf8's GetStringLength will return length without null character
+    size_t len = pDisasmUtf8 ? pDisasmUtf8->GetStringLength()
+                             : pDisasmEncoding->GetBufferSize();
+    // Just in case there are still any null characters at the end, get rid of them.
+    while (len && pBytes[len-1] == '\0')
+      len -= 1;
+
     // Note: with \r\n line endings, writing the disassembly could be a simple
     // WriteBlobToHandle with a prior and following WriteString for #ifs
     OS << "#if 0\r\n";
-    const uint8_t *pBytes = (const uint8_t *)pDisassembly->GetBufferPointer();
-    size_t len = pDisassembly->GetBufferSize();
     s.reserve(len + len * 0.1f); // rough estimate
     for (size_t i = 0; i < len; ++i) {
       if (pBytes[i] == '\n')
@@ -894,13 +924,9 @@ void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
       OS << pBytes[i];
     }
     OS << "\r\n#endif\r\n";
-    OS.flush();
-    WriteString(file, s.c_str(), pFileName);
   }
 
   {
-    std::string s;
-    llvm::raw_string_ostream OS(s);
     OS << "\r\nconst unsigned char " << pVariableName << "[] = {";
     const uint8_t *pBytes = (const uint8_t *)pCode->GetBufferPointer();
     size_t len = pCode->GetBufferSize();
@@ -916,9 +942,14 @@ void DxcContext::WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
       OS.write_hex(pBytes[i]);
     }
     OS << "\r\n};\r\n";
-    OS.flush();
-    WriteString(file, s.c_str(), pFileName);
   }
+
+  OS.flush();
+
+  // Respect user's -encoding option
+  CComPtr<IDxcBlobEncoding> pOutBlob;
+  pLibrary->CreateBlobWithEncodingFromPinned(s.data(), s.length(), DXC_CP_UTF8, &pOutBlob);
+  WriteBlobToFile(pOutBlob, pFileName, m_Opts.DefaultTextCodePage);
 }
 
 // Finds DXIL module from the blob assuming blob is either DxilContainer, DxilPartHeader, or DXIL module

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác