Browse Source

Added support for pre-SM5.1 resource allocation algorithm (#1687)

Added front-end `-flegacy-resource-reservation` to control behavior (added automatically for SM <= 5.0)
Added an error for unbounded resources used with `-flegacy-resource-reservation`
Added a mechanism for `DxilModule` to preserve intermediate options during its passes but not in the final DXIL
Changed `HLModule` to not remove unreferenced `DxilResources` but rather make their symbol UndefValue
Fixed assumptions of `DxilResources` having a valid `GlobalVariable`
Added reserved resource range gather pass before elimination of unused resources (if SM <= 5.0)
Changed resource allocation to account for reserved ranges
Tristan Labelle 6 years ago
parent
commit
685b2afa84
28 changed files with 515 additions and 270 deletions
  1. 8 0
      include/dxc/DXIL/DxilMetadataHelper.h
  2. 15 2
      include/dxc/DXIL/DxilModule.h
  3. 26 0
      include/dxc/HLSL/DxilSpanAllocator.h
  4. 5 4
      include/dxc/HLSL/HLModule.h
  5. 1 0
      include/dxc/Support/HLSLOptions.h
  6. 4 3
      include/dxc/Support/HLSLOptions.td
  7. 38 0
      lib/DXIL/DxilMetadataHelper.cpp
  8. 32 21
      lib/DXIL/DxilModule.cpp
  9. 1 2
      lib/DXIL/DxilResource.cpp
  10. 1 0
      lib/DxcSupport/HLSLOptions.cpp
  11. 180 102
      lib/HLSL/DxilCondenseResources.cpp
  12. 1 2
      lib/HLSL/DxilContainerReflection.cpp
  13. 15 7
      lib/HLSL/DxilGenerationPass.cpp
  14. 1 1
      lib/HLSL/DxilPatchShaderRecordBindings.cpp
  15. 3 0
      lib/HLSL/DxilPreparePasses.cpp
  16. 39 44
      lib/HLSL/HLModule.cpp
  17. 3 8
      lib/Transforms/IPO/GlobalDCE.cpp
  18. 2 0
      tools/clang/include/clang/Frontend/CodeGenOptions.h
  19. 38 56
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  20. 14 0
      tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_arrays_sm50.hlsl
  21. 14 0
      tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_arrays_sm51.hlsl
  22. 14 0
      tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_sm50.hlsl
  23. 9 0
      tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_sm50_no_unbounded.hlsl
  24. 13 0
      tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_sm51.hlsl
  25. 4 0
      tools/clang/tools/dxc/dxc.cpp
  26. 3 6
      tools/clang/tools/dxcompiler/dxcdisassembler.cpp
  27. 1 0
      tools/clang/tools/dxcompiler/dxcompilerobj.cpp
  28. 30 12
      tools/clang/unittests/HLSL/OptimizerTest.cpp

+ 8 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -74,6 +74,10 @@ public:
   static const unsigned kDxilShaderModelMajorIdx  = 1;  // Shader model major.
   static const unsigned kDxilShaderModelMinorIdx  = 2;  // Shader model minor.
 
+  // Intermediate codegen/optimizer options, not valid in final DXIL module.
+  static const char kDxilIntermediateOptionsMDName[];
+  static const unsigned kDxilIntermediateOptionsFlags = 0;  // Unique element ID.
+
   // Entry points.
   static const char kDxilEntryPointsMDName[];
 
@@ -286,6 +290,10 @@ public:
   void EmitDxilShaderModel(const ShaderModel *pSM);
   void LoadDxilShaderModel(const ShaderModel *&pSM);
 
+  // Intermediate flags
+  void EmitDxilIntermediateOptions(uint32_t flags);
+  void LoadDxilIntermediateOptions(uint32_t &flags);
+
   // Entry points.
   void EmitDxilEntryPoints(std::vector<llvm::MDNode *> &MDEntries);
   void UpdateDxilEntryPoints(std::vector<llvm::MDNode *> &MDEntries);

+ 15 - 2
include/dxc/DXIL/DxilModule.h

@@ -110,7 +110,7 @@ public:
   void LoadDxilSamplerFromMDNode(llvm::MDNode *MD, DxilSampler &S);
 
   void RemoveUnusedResources();
-  void RemoveUnusedResourceSymbols();
+  void RemoveResourcesWithUnusedSymbols();
   void RemoveFunction(llvm::Function *F);
 
   // Signatures.
@@ -249,6 +249,11 @@ public:
   void SetAllResourcesBound(bool resourcesBound);
   bool GetAllResourcesBound() const;
 
+  // Intermediate options that do not make it to DXIL
+  void SetLegacyResourceReservation(bool legacyResourceReservation);
+  bool GetLegacyResourceReservation() const;
+  void ClearIntermediateOptions();
+
   // Hull and Domain shaders.
   unsigned GetInputControlPointCount() const;
   void SetInputControlPointCount(unsigned NumICPs);
@@ -291,6 +296,11 @@ private:
   DXIL::PrimitiveTopology m_StreamPrimitiveTopology;
   unsigned m_ActiveStreamMask;
 
+private:
+  enum IntermediateFlags : uint32_t {
+    LegacyResourceReservation = 1 << 0,
+  };
+
 private:
   llvm::LLVMContext &m_Ctx;
   llvm::Module *m_pModule;
@@ -330,10 +340,13 @@ private:
   template<typename T> unsigned AddResource(std::vector<std::unique_ptr<T> > &Vec, std::unique_ptr<T> pRes);
   void LoadDxilSignature(const llvm::MDTuple *pSigTuple, DxilSignature &Sig, bool bInput);
 
-  // properties from HLModule
+  // properties from HLModule preserved as ShaderFlags
   bool m_bDisableOptimizations;
   bool m_bUseMinPrecision;
   bool m_bAllResourcesBound;
+
+  // properties from HLModule that should not make it to the final DXIL
+  uint32_t m_IntermediateFlags;
   uint32_t m_AutoBindingSpace;
 
   std::unique_ptr<DxilSubobjects> m_pSubobjects;

+ 26 - 0
include/dxc/HLSL/DxilSpanAllocator.h

@@ -61,6 +61,17 @@ public:
     return Find(size, next, pos, align);
   }
 
+  // Finds the farthest position at which an element could be allocated.
+  bool FindForUnbounded(T_index &pos, T_index align = 1) {
+    if (m_Spans.empty()) {
+      pos = m_Min;
+      return UpdatePos(pos, /*size*/1, align);
+    }
+
+    pos = m_Spans.crbegin()->end;
+    return IncPos(pos, /*inc*/ 1, /*size*/1, align);
+  }
+
   // allocate element size in first available space, returns false on failure
   bool Allocate(const T_element *element, T_index size, T_index &pos, T_index align = 1) {
     DXASSERT_NOMSG(size);
@@ -115,6 +126,21 @@ public:
     return nullptr;
   }
 
+  // Insert at specific location, overwriting anything previously there,
+  // losing their element pointer, but conserving the spans they represented.
+  void ForceInsertAndClobber(const T_element *element, T_index start, T_index end) {
+    DXASSERT_NOMSG(m_Min <= start && start <= end && end <= m_Max);
+    for (;;) {
+      auto result = m_Spans.emplace(element, start, end);
+      if (result.second)
+        break;
+      // Delete the spans we overlap with, but make sure our new span covers what they covered.
+      start = std::min(result.first->start, start);
+      end = std::max(result.first->end, end);
+      m_Spans.erase(result.first);
+    }
+  }
+
 private:
   // Find size gap starting at iterator, updating pos, and returning true if successful
   bool Find(T_index size, typename SpanSet::const_iterator it, T_index &pos, T_index align = 1) {

+ 5 - 4
include/dxc/HLSL/HLModule.h

@@ -48,8 +48,9 @@ class OP;
 
 struct HLOptions {
   HLOptions()
-      : bDefaultRowMajor(false), bIEEEStrict(false), bDisableOptimizations(false),
-        bLegacyCBufferLoad(false), PackingStrategy(0), bDX9CompatMode(0), bFXCCompatMode(0), unused(0) {
+      : bDefaultRowMajor(false), bIEEEStrict(false), bAllResourcesBound(false), bDisableOptimizations(false),
+        bLegacyCBufferLoad(false), PackingStrategy(0), bUseMinPrecision(false), bDX9CompatMode(false),
+        bFXCCompatMode(false), bLegacyResourceReservation(false), unused(0) {
   }
   uint32_t GetHLOptionsRaw() const;
   void SetHLOptionsRaw(uint32_t data);
@@ -63,7 +64,8 @@ struct HLOptions {
   unsigned bUseMinPrecision        : 1;
   unsigned bDX9CompatMode          : 1;
   unsigned bFXCCompatMode          : 1;
-  unsigned unused                  : 22;
+  unsigned bLegacyResourceReservation : 1;
+  unsigned unused                  : 21;
 };
 
 typedef std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>> DxilFunctionPropsMap;
@@ -122,7 +124,6 @@ public:
 
   void RemoveGlobal(llvm::GlobalVariable *GV);
   void RemoveFunction(llvm::Function *F);
-  void RemoveResources(llvm::GlobalVariable **ppVariables, unsigned count);
 
   // ThreadGroupSharedMemory.
   typedef std::vector<llvm::GlobalVariable*>::iterator tgsm_iterator;

+ 1 - 0
include/dxc/Support/HLSLOptions.h

@@ -159,6 +159,7 @@ public:
   bool DisassembleByteOffset = false; //OPT_No
   bool DisaseembleHex = false; //OPT_Lx
   bool LegacyMacroExpansion = false; // OPT_flegacy_macro_expansion
+  bool LegacyResourceReservation = false; // OPT_flegacy_resource_reservation
   unsigned long AutoBindingSpace = UINT_MAX; // OPT_auto_binding_space
   bool ExportShadersOnly = false; // OPT_export_shaders_only
 

+ 4 - 3
include/dxc/Support/HLSLOptions.td

@@ -215,6 +215,10 @@ def external_fn : Separate<["-", "/"], "external-fn">, Group<hlslcore_Group>, Fl
   HelpText<"External function name to load for compiler support">;
 def fcgl : Flag<["-", "/"], "fcgl">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Generate high-level code only">;
+def flegacy_macro_expansion : Flag<["-", "/"], "flegacy-macro-expansion">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
+    HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;
+def flegacy_resource_reservation : Flag<["-", "/"], "flegacy-resource-reservation">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
+    HelpText<"Reserve unused explicit register assignments for compatibility with shader model 5.0 and below">;
 def not_use_legacy_cbuf_load : Flag<["-", "/"], "not_use_legacy_cbuf_load">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Do not use legacy cbuffer load">;
 def pack_prefix_stable : Flag<["-", "/"], "pack_prefix_stable">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
@@ -379,6 +383,3 @@ def nologo : Flag<["-", "/"], "nologo">, Group<hlslcore_Group>, Flags<[DriverOpt
 
 // Also removed: compress, decompress, /Gch (child effect), /Gpp (partial precision)
 // /Op - no support for preshaders.
-
-def flegacy_macro_expansion : Flag<["-"], "flegacy-macro-expansion">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
-    HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;

+ 38 - 0
lib/DXIL/DxilMetadataHelper.cpp

@@ -54,6 +54,7 @@ const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valv
 
 // This named metadata is not valid in final module (should be moved to DxilContainer)
 const char DxilMDHelper::kDxilRootSignatureMDName[]                   = "dx.rootSignature";
+const char DxilMDHelper::kDxilIntermediateOptionsMDName[]             = "dx.intermediateOptions";
 const char DxilMDHelper::kDxilViewIdStateMDName[]                     = "dx.viewIdState";
 const char DxilMDHelper::kDxilSubobjectsMDName[]                      = "dx.subobjects";
 
@@ -196,6 +197,43 @@ void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
   }
 }
 
+//
+// intermediate options.
+//
+void DxilMDHelper::EmitDxilIntermediateOptions(uint32_t flags) {
+  if (flags == 0) return;
+
+  NamedMDNode *pIntermediateOptionsNamedMD = m_pModule->getNamedMetadata(kDxilIntermediateOptionsMDName);
+  IFTBOOL(pIntermediateOptionsNamedMD == nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  pIntermediateOptionsNamedMD = m_pModule->getOrInsertNamedMetadata(kDxilIntermediateOptionsMDName);
+
+  pIntermediateOptionsNamedMD->addOperand(
+    MDNode::get(m_Ctx, { Uint32ToConstMD(kDxilIntermediateOptionsFlags), Uint32ToConstMD(flags) }));
+}
+
+void DxilMDHelper::LoadDxilIntermediateOptions(uint32_t &flags) {
+  flags = 0;
+
+  NamedMDNode *pIntermediateOptionsNamedMD = m_pModule->getNamedMetadata(kDxilIntermediateOptionsMDName);
+  if (pIntermediateOptionsNamedMD == nullptr) return;
+
+  for (unsigned i = 0; i < pIntermediateOptionsNamedMD->getNumOperands(); i++) {
+    MDTuple *pEntry = dyn_cast<MDTuple>(pIntermediateOptionsNamedMD->getOperand(i));
+    IFTBOOL(pEntry != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+    IFTBOOL(pEntry->getNumOperands() >= 1, DXC_E_INCORRECT_DXIL_METADATA);
+
+    uint32_t id = ConstMDToUint32(pEntry->getOperand(0));
+    switch (id) {
+    case kDxilIntermediateOptionsFlags:
+      IFTBOOL(pEntry->getNumOperands() == 2, DXC_E_INCORRECT_DXIL_METADATA);
+      flags = ConstMDToUint32(pEntry->getOperand(1));
+      break;
+
+    default: throw hlsl::Exception(DXC_E_INCORRECT_DXIL_METADATA, "Unrecognized intermediate options metadata");
+    }
+  }
+}
+
 //
 // Entry points.
 //

+ 32 - 21
lib/DXIL/DxilModule.cpp

@@ -111,6 +111,7 @@ DxilModule::DxilModule(Module *pModule)
 , m_bDisableOptimizations(false)
 , m_bUseMinPrecision(true) // use min precision by default
 , m_bAllResourcesBound(false)
+, m_IntermediateFlags(0)
 , m_AutoBindingSpace(UINT_MAX)
 , m_pSubobjects(nullptr)
 {
@@ -498,6 +499,19 @@ bool DxilModule::GetAllResourcesBound() const {
   return m_bAllResourcesBound;
 }
 
+void DxilModule::SetLegacyResourceReservation(bool legacyResourceReservation) {
+  m_IntermediateFlags &= ~LegacyResourceReservation;
+  if (legacyResourceReservation) m_IntermediateFlags |= LegacyResourceReservation;
+}
+
+bool DxilModule::GetLegacyResourceReservation() const {
+  return (m_IntermediateFlags & LegacyResourceReservation) != 0;
+}
+
+void DxilModule::ClearIntermediateOptions() {
+  m_IntermediateFlags = 0;
+}
+
 unsigned DxilModule::GetInputControlPointCount() const {
   if (!(m_pSM->IsHS() || m_pSM->IsDS()))
     return 0;
@@ -798,7 +812,7 @@ void DxilModule::RemoveFunction(llvm::Function *F) {
 }
 
 void DxilModule::RemoveUnusedResources() {
-  DXASSERT(!m_pSM->IsLib(), "this function not work on library");
+  DXASSERT(!m_pSM->IsLib(), "this function does not work on libraries");
   hlsl::OP *hlslOP = GetOP();
   Function *createHandleFunc = hlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(GetCtx()));
   if (createHandleFunc->user_empty()) {
@@ -852,11 +866,11 @@ void DxilModule::RemoveUnusedResources() {
   std::unordered_set<unsigned> immSamplerID;
   std::unordered_set<unsigned> immCBufID;
   ConvertUsedResource(immUAVID, usedUAVID);
-  RemoveResources(m_UAVs, immUAVID);
   ConvertUsedResource(immSRVID, usedSRVID);
   ConvertUsedResource(immSamplerID, usedSamplerID);
   ConvertUsedResource(immCBufID, usedCBufID);
 
+  RemoveResources(m_UAVs, immUAVID);
   RemoveResources(m_SRVs, immSRVID);
   RemoveResources(m_Samplers, immSamplerID);
   RemoveResources(m_CBuffers, immCBufID);
@@ -864,15 +878,16 @@ void DxilModule::RemoveUnusedResources() {
 
 namespace {
 template <typename TResource>
-static void RemoveResourceSymbols(std::vector<std::unique_ptr<TResource>> &vec) {
+static void RemoveResourcesWithUnusedSymbolsHelper(std::vector<std::unique_ptr<TResource>> &vec) {
   unsigned resID = 0;
   for (auto p = vec.begin(); p != vec.end();) {
     auto c = p++;
-    GlobalVariable *GV = cast<GlobalVariable>((*c)->GetGlobalSymbol());
-    GV->removeDeadConstantUsers();
-    if (GV->user_empty()) {
+    Constant *symbol = (*c)->GetGlobalSymbol();
+    symbol->removeDeadConstantUsers();
+    if (symbol->user_empty()) {
       p = vec.erase(c);
-      GV->eraseFromParent();
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(symbol))
+        GV->eraseFromParent();
       continue;
     }
     if ((*c)->GetID() != resID) {
@@ -883,11 +898,11 @@ static void RemoveResourceSymbols(std::vector<std::unique_ptr<TResource>> &vec)
 }
 }
 
-void DxilModule::RemoveUnusedResourceSymbols() {
-  RemoveResourceSymbols(m_SRVs);
-  RemoveResourceSymbols(m_UAVs);
-  RemoveResourceSymbols(m_CBuffers);
-  RemoveResourceSymbols(m_Samplers);
+void DxilModule::RemoveResourcesWithUnusedSymbols() {
+  RemoveResourcesWithUnusedSymbolsHelper(m_SRVs);
+  RemoveResourcesWithUnusedSymbolsHelper(m_UAVs);
+  RemoveResourcesWithUnusedSymbolsHelper(m_CBuffers);
+  RemoveResourcesWithUnusedSymbolsHelper(m_Samplers);
 }
 
 DxilSignature &DxilModule::GetInputSignature() {
@@ -1144,6 +1159,7 @@ void DxilModule::ClearDxilMetadata(Module &M) {
       name == DxilMDHelper::kDxilShaderModelMDName ||
       name == DxilMDHelper::kDxilEntryPointsMDName ||
       name == DxilMDHelper::kDxilRootSignatureMDName ||
+      name == DxilMDHelper::kDxilIntermediateOptionsMDName ||
       name == DxilMDHelper::kDxilResourcesMDName ||
       name == DxilMDHelper::kDxilTypeSystemMDName ||
       name == DxilMDHelper::kDxilViewIdStateMDName ||
@@ -1161,6 +1177,7 @@ void DxilModule::EmitDxilMetadata() {
   m_pMDHelper->EmitDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pMDHelper->EmitValidatorVersion(m_ValMajor, m_ValMinor);
   m_pMDHelper->EmitDxilShaderModel(m_pSM);
+  m_pMDHelper->EmitDxilIntermediateOptions(m_IntermediateFlags);
 
   MDTuple *pMDProperties = nullptr;
   uint64_t flag = m_ShaderFlags.GetShaderFlagsRaw();
@@ -1240,6 +1257,7 @@ void DxilModule::LoadDxilMetadata() {
   m_pMDHelper->LoadValidatorVersion(m_ValMajor, m_ValMinor);
   const ShaderModel *loadedSM;
   m_pMDHelper->LoadDxilShaderModel(loadedSM);
+  m_pMDHelper->LoadDxilIntermediateOptions(m_IntermediateFlags);
 
   // This must be set before LoadDxilEntryProperties
   m_pMDHelper->SetShaderModel(loadedSM);
@@ -1264,15 +1282,8 @@ void DxilModule::LoadDxilMetadata() {
   uint64_t rawShaderFlags = 0;
   DxilFunctionProps entryFuncProps;
   entryFuncProps.shaderKind = loadedSM->GetKind();
-  if (loadedSM->IsLib()) {
-    // Get rawShaderFlags and m_AutoBindingSpace; entryFuncProps unused.
-    m_pMDHelper->LoadDxilEntryProperties(*pEntryProperties, rawShaderFlags,
-                                         entryFuncProps, m_AutoBindingSpace);
-  }
-  else {
-    m_pMDHelper->LoadDxilEntryProperties(*pEntryProperties, rawShaderFlags,
-                                         entryFuncProps, m_AutoBindingSpace);
-  }
+  m_pMDHelper->LoadDxilEntryProperties(*pEntryProperties, rawShaderFlags,
+                                       entryFuncProps, m_AutoBindingSpace);
 
   m_bUseMinPrecision = true;
   if (rawShaderFlags) {

+ 1 - 2
lib/DXIL/DxilResource.cpp

@@ -39,8 +39,7 @@ void DxilResource::SetCompType(const CompType CT) {
 }
 
 Type *DxilResource::GetRetType() const {
-  Constant *GV = GetGlobalSymbol();
-  Type *Ty = GV->getType()->getPointerElementType();
+  Type *Ty = GetGlobalSymbol()->getType()->getPointerElementType();
   // For resource array, use element type.
   while (Ty->isArrayTy())
     Ty = Ty->getArrayElementType();

+ 1 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -542,6 +542,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DisassembleByteOffset = Args.hasFlag(OPT_No, OPT_INVALID, false);
   opts.DisaseembleHex = Args.hasFlag(OPT_Lx, OPT_INVALID, false);
   opts.LegacyMacroExpansion = Args.hasFlag(OPT_flegacy_macro_expansion, OPT_INVALID, false);
+  opts.LegacyResourceReservation = Args.hasFlag(OPT_flegacy_resource_reservation, OPT_INVALID, false);
   opts.ExportShadersOnly = Args.hasFlag(OPT_export_shaders_only, OPT_INVALID, false);
 
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {

+ 180 - 102
lib/HLSL/DxilCondenseResources.cpp

@@ -94,105 +94,167 @@ void ApplyRewriteMapOnResTable(RemapEntryCollection &rewrites, DxilModule &DM) {
 
 } // namespace
 
-// Resource lowerBound allocation.
-namespace {
-
-template <typename T>
-static bool
-AllocateDxilResource(const std::vector<std::unique_ptr<T>> &resourceList,
-                     LLVMContext &Ctx, unsigned AutoBindingSpace=0) {
-  bool bChanged = false;
-  SpacesAllocator<unsigned, T> SAlloc;
-
-  for (auto &res : resourceList) {
-    const unsigned space = res->GetSpaceID();
-    typename SpacesAllocator<unsigned, T>::Allocator &alloc = SAlloc.Get(space);
+class DxilResourceRegisterAllocator {
+private:
+  SpacesAllocator<unsigned, hlsl::DxilCBuffer> m_reservedCBufferRegisters;
+  SpacesAllocator<unsigned, hlsl::DxilSampler> m_reservedSamplerRegisters;
+  SpacesAllocator<unsigned, hlsl::DxilResource> m_reservedUAVRegisters;
+  SpacesAllocator<unsigned, hlsl::DxilResource> m_reservedSRVRegisters;
+
+  template<typename T>
+  static void GatherReservedRegisters(
+    const std::vector<std::unique_ptr<T>> &ResourceList,
+    SpacesAllocator<unsigned, T> &SAlloc) {
+    for (auto &res : ResourceList) {
+      if (res->IsAllocated()) {
+        typename SpacesAllocator<unsigned, T>::Allocator &Alloc = SAlloc.Get(res->GetSpaceID());
+        Alloc.ForceInsertAndClobber(res.get(), res->GetLowerBound(), res->GetUpperBound());
+        if (res->IsUnbounded())
+          Alloc.SetUnbounded(res.get());
+      }
+    }
+  }
 
-    if (res->IsAllocated()) {
-      const unsigned reg = res->GetLowerBound();
-      const T *conflict = nullptr;
-      if (res->IsUnbounded()) {
-        const T *unbounded = alloc.GetUnbounded();
-        if (unbounded) {
-          Ctx.emitError(Twine("more than one unbounded resource (") +
-                        unbounded->GetGlobalName() + (" and ") +
-                        res->GetGlobalName() + (") in space ") + Twine(space));
-        } else {
+  template <typename T>
+  static bool
+  AllocateRegisters(const std::vector<std::unique_ptr<T>> &resourceList,
+    LLVMContext &Ctx, SpacesAllocator<unsigned, T> &ReservedRegisters,
+    unsigned AutoBindingSpace) {
+    bool bChanged = false;
+    SpacesAllocator<unsigned, T> SAlloc;
+
+    // Reserve explicitly allocated resources
+    for (auto &res : resourceList) {
+      const unsigned space = res->GetSpaceID();
+      typename SpacesAllocator<unsigned, T>::Allocator &alloc = SAlloc.Get(space);
+      typename SpacesAllocator<unsigned, T>::Allocator &reservedAlloc = ReservedRegisters.Get(space);
+
+      if (res->IsAllocated()) {
+        const unsigned reg = res->GetLowerBound();
+        const T *conflict = nullptr;
+        if (res->IsUnbounded()) {
+          const T *unbounded = alloc.GetUnbounded();
+          if (unbounded) {
+            Ctx.emitError(Twine("more than one unbounded resource (") +
+              unbounded->GetGlobalName() + (" and ") +
+              res->GetGlobalName() + (") in space ") + Twine(space));
+          }
+          else {
+            conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
+            if (!conflict) {
+              alloc.SetUnbounded(res.get());
+              reservedAlloc.SetUnbounded(res.get());
+            }
+          }
+        }
+        else {
           conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
-          if (!conflict)
-            alloc.SetUnbounded(res.get());
         }
-      } else {
-        conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
-      }
-      if (conflict) {
-        Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
-                      Twine("resource ") + res->GetGlobalName() +
-                      Twine(" at register ") + Twine(reg) +
-                      Twine(" overlaps with resource ") +
-                      conflict->GetGlobalName() + Twine(" at register ") +
-                      Twine(conflict->GetLowerBound()) + Twine(", space ") +
-                      Twine(space));
+        if (conflict) {
+          Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
+            Twine("resource ") + res->GetGlobalName() +
+            Twine(" at register ") + Twine(reg) +
+            Twine(" overlaps with resource ") +
+            conflict->GetGlobalName() + Twine(" at register ") +
+            Twine(conflict->GetLowerBound()) + Twine(", space ") +
+            Twine(space));
+        }
+        else {
+          // Also add this to the reserved (unallocatable) range, if it wasn't already there.
+          reservedAlloc.ForceInsertAndClobber(res.get(), res->GetLowerBound(), res->GetUpperBound());
+        }
       }
     }
-  }
 
-  // Allocate.
-  const unsigned space = AutoBindingSpace;
-  typename SpacesAllocator<unsigned, T>::Allocator &alloc0 = SAlloc.Get(space);
-  for (auto &res : resourceList) {
-    if (!res->IsAllocated()) {
+    // Allocate unallocated resources
+    const unsigned space = AutoBindingSpace;
+    typename SpacesAllocator<unsigned, T>::Allocator &alloc0 = SAlloc.Get(space);
+    typename SpacesAllocator<unsigned, T>::Allocator &reservedAlloc0 = ReservedRegisters.Get(space);
+    for (auto &res : resourceList) {
+      if (res->IsAllocated())
+        continue;
+
       DXASSERT(res->GetSpaceID() == 0,
-               "otherwise non-zero space has no user register assignment");
+        "otherwise non-zero space has no user register assignment");
+
       unsigned reg = 0;
-      bool success = false;
+      unsigned end = 0;
+      bool allocateSpaceFound = false;
       if (res->IsUnbounded()) {
-        const T *unbounded = alloc0.GetUnbounded();
-        if (unbounded) {
+        if (alloc0.GetUnbounded() != nullptr) {
+          const T *unbounded = alloc0.GetUnbounded();
           Ctx.emitError(Twine("more than one unbounded resource (") +
-                        unbounded->GetGlobalName() + Twine(" and ") +
-                        res->GetGlobalName() + Twine(") in space ") +
-                        Twine(space));
-        } else {
-          success = alloc0.AllocateUnbounded(res.get(), reg);
-          if (success)
-            alloc0.SetUnbounded(res.get());
+            unbounded->GetGlobalName() + Twine(" and ") +
+            res->GetGlobalName() + Twine(") in space ") +
+            Twine(space));
+          continue;
         }
-      } else {
-        success = alloc0.Allocate(res.get(), res->GetRangeSize(), reg);
+
+        if (reservedAlloc0.FindForUnbounded(reg)) {
+          end = UINT_MAX;
+          allocateSpaceFound = true;
+        }
+      }
+      else if (reservedAlloc0.Find(res->GetRangeSize(), reg)) {
+        end = reg + res->GetRangeSize() - 1;
+        allocateSpaceFound = true;
       }
-      if (success) {
+
+      if (allocateSpaceFound) {
+        bool success = reservedAlloc0.Insert(res.get(), reg, end) == nullptr;
+        DXASSERT_NOMSG(success);
+
+        success = alloc0.Insert(res.get(), reg, end) == nullptr;
+        DXASSERT_NOMSG(success);
+
+        if (res->IsUnbounded()) {
+          alloc0.SetUnbounded(res.get());
+          reservedAlloc0.SetUnbounded(res.get());
+        }
+
         res->SetLowerBound(reg);
         res->SetSpaceID(space);
         bChanged = true;
       } else {
         Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
-                      Twine("resource ") + res->GetGlobalName() +
-                      Twine(" could not be allocated"));
+          Twine("resource ") + res->GetGlobalName() +
+          Twine(" could not be allocated"));
       }
     }
+
+    return bChanged;
   }
 
-  return bChanged;
-}
+public:
+  void GatherReservedRegisters(DxilModule &DM) {
+    // For backcompat with FXC, shader models 5.0 and below will not auto-allocate
+    // resources at a register explicitely assigned to even an unused resource.
+    if (DM.GetLegacyResourceReservation()) {
+      GatherReservedRegisters(DM.GetCBuffers(), m_reservedCBufferRegisters);
+      GatherReservedRegisters(DM.GetSamplers(), m_reservedSamplerRegisters);
+      GatherReservedRegisters(DM.GetUAVs(), m_reservedUAVRegisters);
+      GatherReservedRegisters(DM.GetSRVs(), m_reservedSRVRegisters);
+    }
+  }
 
-bool AllocateDxilResources(DxilModule &DM) {
-  uint32_t AutoBindingSpace = DM.GetAutoBindingSpace();
-  if (AutoBindingSpace == UINT_MAX) {
-    // For libraries, we don't allocate unless AutoBindingSpace is set.
-    if (DM.GetShaderModel()->IsLib())
-      return false;
-    // For shaders, we allocate in space 0 by default.
-    AutoBindingSpace = 0;
+  bool AllocateRegisters(DxilModule &DM) {
+    uint32_t AutoBindingSpace = DM.GetAutoBindingSpace();
+    if (AutoBindingSpace == UINT_MAX) {
+      // For libraries, we don't allocate unless AutoBindingSpace is set.
+      if (DM.GetShaderModel()->IsLib())
+        return false;
+      // For shaders, we allocate in space 0 by default.
+      AutoBindingSpace = 0;
+    }
+
+    bool bChanged = false;
+    bChanged |= AllocateRegisters(DM.GetCBuffers(), DM.GetCtx(), m_reservedCBufferRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetSamplers(), DM.GetCtx(), m_reservedSamplerRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetUAVs(), DM.GetCtx(), m_reservedUAVRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetSRVs(), DM.GetCtx(), m_reservedSRVRegisters, AutoBindingSpace);
+    return bChanged;
   }
-  bool bChanged = false;
-  bChanged |= AllocateDxilResource(DM.GetCBuffers(), DM.GetCtx(), AutoBindingSpace);
-  bChanged |= AllocateDxilResource(DM.GetSamplers(), DM.GetCtx(), AutoBindingSpace);
-  bChanged |= AllocateDxilResource(DM.GetUAVs(), DM.GetCtx(), AutoBindingSpace);
-  bChanged |= AllocateDxilResource(DM.GetSRVs(), DM.GetCtx(), AutoBindingSpace);
-  return bChanged;
-}
-} // namespace
+};
 
 class DxilCondenseResources : public ModulePass {
 private:
@@ -210,7 +272,12 @@ public:
     if (DM.GetShaderModel()->IsLib())
       return false;
 
-    // Remove unused resource.
+    // Gather reserved resource registers while we still have
+    // unused resources that might have explicit register assignments.
+    DxilResourceRegisterAllocator ResourceRegisterAllocator;
+    ResourceRegisterAllocator.GatherReservedRegisters(DM);
+
+    // Remove unused resources.
     DM.RemoveUnusedResources();
 
     // Make sure all resource types are dense; build a map of rewrites.
@@ -224,7 +291,7 @@ public:
 
     if (hasResource) {
       if (!DM.GetShaderModel()->IsLib()) {
-        AllocateDxilResources(DM);
+        ResourceRegisterAllocator.AllocateRegisters(DM);
         PatchCreateHandle(DM);
       }
     }
@@ -391,8 +458,13 @@ public:
     if (DM.GetCBuffers().size())
       bChanged = PatchTBuffers(DM) || bChanged;
 
-    // Remove unused resource.
-    DM.RemoveUnusedResourceSymbols();
+    // Gather reserved resource registers while we still have
+    // unused resources that might have explicit register assignments.
+    DxilResourceRegisterAllocator ResourceRegisterAllocator;
+    ResourceRegisterAllocator.GatherReservedRegisters(DM);
+
+    // Remove unused resources.
+    DM.RemoveResourcesWithUnusedSymbols();
 
     unsigned newResources = DM.GetCBuffers().size() + DM.GetUAVs().size() +
                             DM.GetSRVs().size() + DM.GetSamplers().size();
@@ -401,7 +473,7 @@ public:
     if (0 == newResources)
       return bChanged;
 
-    bChanged |= AllocateDxilResources(DM);
+    bChanged |= ResourceRegisterAllocator.AllocateRegisters(DM);
 
     if (m_bIsLib && DM.GetShaderModel()->GetMinor() == ShaderModel::kOfflineMinor)
       return bChanged;
@@ -1546,15 +1618,15 @@ StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
 
 void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res,
                                      DxilTypeSystem &TypeSys, Module &M) {
-  GlobalVariable *GV = cast<GlobalVariable>(Res.GetGlobalSymbol());
-  Type *Ty = GV->getType()->getPointerElementType();
+  Constant *Symbol = Res.GetGlobalSymbol();
+  Type *ElemTy = Symbol->getType()->getPointerElementType();
   bool IsResourceArray = Res.GetRangeSize() != 1;
   if (IsResourceArray) {
     // Support Array of struct buffer.
-    if (Ty->isArrayTy())
-      Ty = Ty->getArrayElementType();
+    if (ElemTy->isArrayTy())
+      ElemTy = ElemTy->getArrayElementType();
   }
-  StructType *ST = cast<StructType>(Ty);
+  StructType *ST = cast<StructType>(ElemTy);
   if (ST->isOpaque()) {
     DXASSERT(Res.GetClass() == DxilResourceBase::Class::CBuffer,
              "Only cbuffer can have opaque struct.");
@@ -1564,7 +1636,7 @@ void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res,
   Type *UpdatedST =
       UpdateStructTypeForLegacyLayout(ST, IsResourceArray, TypeSys, M);
   if (ST != UpdatedST) {
-    Type *Ty = GV->getType()->getPointerElementType();
+    Type *Ty = Symbol->getType()->getPointerElementType();
     if (IsResourceArray) {
       // Support Array of struct buffer.
       if (Ty->isArrayTy()) {
@@ -1572,10 +1644,10 @@ void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res,
       }
     }
     GlobalVariable *NewGV = cast<GlobalVariable>(
-        M.getOrInsertGlobal(GV->getName().str() + "_legacy", UpdatedST));
+        M.getOrInsertGlobal(Symbol->getName().str() + "_legacy", UpdatedST));
     Res.SetGlobalSymbol(NewGV);
     // Delete old GV.
-    for (auto UserIt = GV->user_begin(); UserIt != GV->user_end();) {
+    for (auto UserIt = Symbol->user_begin(); UserIt != Symbol->user_end();) {
       Value *User = *(UserIt++);
       if (Instruction *I = dyn_cast<Instruction>(User)) {
         if (!User->user_empty())
@@ -1588,8 +1660,10 @@ void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res,
           CE->replaceAllUsesWith(UndefValue::get(CE->getType()));
       }
     }
-    GV->removeDeadConstantUsers();
-    GV->eraseFromParent();
+    Symbol->removeDeadConstantUsers();
+
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Symbol))
+      GV->eraseFromParent();
   }
 }
 
@@ -1622,15 +1696,13 @@ void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
   std::vector<GlobalVariable *> &LLVMUsed = m_DM->GetLLVMUsed();
 
   auto UpdateResourceSymbol = [&LLVMUsed, this](DxilResourceBase *res) {
-    GlobalVariable *GV = cast<GlobalVariable>(res->GetGlobalSymbol());
-    GV->removeDeadConstantUsers();
-    DXASSERT(GV->user_empty(), "else resource not lowered");
-    Type *Ty = GV->getType();
-    res->SetGlobalSymbol(UndefValue::get(Ty));
-    if (m_HasDbgInfo)
-      LLVMUsed.emplace_back(GV);
-
-    res->SetGlobalSymbol(UndefValue::get(Ty));
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(res->GetGlobalSymbol())) {
+      GV->removeDeadConstantUsers();
+      DXASSERT(GV->user_empty(), "else resource not lowered");
+      res->SetGlobalSymbol(UndefValue::get(GV->getType()));
+      if (m_HasDbgInfo)
+        LLVMUsed.emplace_back(GV);
+    }
   };
 
   for (auto &&C : m_DM->GetCBuffers()) {
@@ -1707,6 +1779,8 @@ void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
   Value *isUniformRes = hlslOP->GetI1Const(0);
 
   Value *GV = res.GetGlobalSymbol();
+  DXASSERT(isa<GlobalValue>(GV), "DxilLowerCreateHandleForLib cannot deal with unused resources.");
+
   Module *pM = m_DM->GetModule();
   // TODO: add debug info to create handle.
   DIVariable *DIV = nullptr;
@@ -1978,7 +2052,9 @@ bool DxilLowerCreateHandleForLib::PatchTBuffers(DxilModule &DM) {
       InitTBuffer(CB, srv.get());
       srv->SetID(offset++);
       DM.AddSRV(std::move(srv));
-      GlobalVariable *GV = cast<GlobalVariable>(CB->GetGlobalSymbol());
+      GlobalVariable *GV = dyn_cast<GlobalVariable>(CB->GetGlobalSymbol());
+      if (GV == nullptr)
+        continue;
       PatchTBufferUse(GV, DM);
       // Set global symbol for cbuffer to an unused value so it can be removed
       // in RemoveUnusedResourceSymbols.
@@ -2031,7 +2107,9 @@ public:
 
     if (hasResource) {
       DM.SetAutoBindingSpace(m_AutoBindingSpace);
-      AllocateDxilResources(DM);
+
+      DxilResourceRegisterAllocator ResourceRegisterAllocator;
+      ResourceRegisterAllocator.AllocateRegisters(DM);
     }
     return true;
   }

+ 1 - 2
lib/HLSL/DxilContainerReflection.cpp

@@ -1138,8 +1138,7 @@ void CShaderReflectionConstantBuffer::InitializeStructuredBuffer(
   // Create reflection type, if we have the necessary annotation info
 
   // Extract the `struct` that wraps element type of the buffer resource
-  Constant *GV = R.GetGlobalSymbol();
-  Type *Ty = GV->getType()->getPointerElementType();
+  Type *Ty = R.GetGlobalSymbol()->getType()->getPointerElementType();
   if(Ty->isArrayTy())
       Ty = Ty->getArrayElementType();
   StructType *ST = cast<StructType>(Ty);

+ 15 - 7
lib/HLSL/DxilGenerationPass.cpp

@@ -160,26 +160,30 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
     auto b = llvm::make_unique<DxilCBuffer>();
     InitResourceBase(C.get(), b.get());
     b->SetSize(C->GetSize());
-    LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
+      LLVMUsed.emplace_back(GV);
     M.AddCBuffer(std::move(b));
   }
   for (auto && C : H.GetUAVs()) {
     auto b = llvm::make_unique<DxilResource>();
     InitResource(C.get(), b.get());
-    LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
+      LLVMUsed.emplace_back(GV);
     M.AddUAV(std::move(b));
   }
   for (auto && C : H.GetSRVs()) {
     auto b = llvm::make_unique<DxilResource>();
     InitResource(C.get(), b.get());
-    LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
+      LLVMUsed.emplace_back(GV);
     M.AddSRV(std::move(b));
   }
   for (auto && C : H.GetSamplers()) {
     auto b = llvm::make_unique<DxilSampler>();
     InitResourceBase(C.get(), b.get());
     b->SetSamplerKind(C->GetSamplerKind());
-    LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
+      LLVMUsed.emplace_back(GV);
     M.AddSampler(std::move(b));
   }
 
@@ -192,6 +196,7 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
   // Shader properties.
   //bool m_bDisableOptimizations;
   M.SetDisableOptimization(H.GetHLOptions().bDisableOptimizations);
+  M.SetLegacyResourceReservation(H.GetHLOptions().bLegacyResourceReservation);
   //bool m_bDisableMathRefactoring;
   //bool m_bEnableDoublePrecision;
   //bool m_bEnableDoubleExtensions;
@@ -416,8 +421,8 @@ MarkUavUpdateCounter(Value* LoadOrGEP,
 static void
 MarkUavUpdateCounter(DxilResource &res,
                      std::unordered_set<LoadInst *> &UpdateCounterSet) {
-  Value *GV = res.GetGlobalSymbol();
-  for (auto U = GV->user_begin(), E = GV->user_end(); U != E;) {
+  Value *V = res.GetGlobalSymbol();
+  for (auto U = V->user_begin(), E = V->user_end(); U != E;) {
     User *user = *(U++);
     // Skip unused user.
     if (user->user_empty())
@@ -443,7 +448,10 @@ void DxilGenerationPass::GenerateDxilCBufferHandles() {
 
   for (size_t i = 0; i < m_pHLModule->GetCBuffers().size(); i++) {
     DxilCBuffer &CB = m_pHLModule->GetCBuffer(i);
-    GlobalVariable *GV = cast<GlobalVariable>(CB.GetGlobalSymbol());
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(CB.GetGlobalSymbol());
+    if (GV == nullptr)
+      continue;
+
     // Remove GEP created in HLObjectOperationLowerHelper::UniformCbPtr.
     GV->removeDeadConstantUsers();
     std::string handleName = std::string(GV->getName());

+ 1 - 1
lib/HLSL/DxilPatchShaderRecordBindings.cpp

@@ -783,7 +783,7 @@ bool DxilPatchShaderRecordBindings::GetHandleInfo(
     shaderRegister = Resource->GetLowerBound();
     kind = Resource->GetKind();
     resClass = Resource->GetClass();
-    resType = cast<GlobalVariable>(Resource->GetGlobalSymbol())->getType()->getPointerElementType();
+    resType = Resource->GetGlobalSymbol()->getType()->getPointerElementType();
   }
   return Resource != nullptr;
 }

+ 3 - 0
lib/HLSL/DxilPreparePasses.cpp

@@ -307,6 +307,9 @@ public:
       // Update Validator Version
       DM.UpgradeToMinValidatorVersion();
 
+      // Clear intermediate options that shouldn't be in the final DXIL
+      DM.ClearIntermediateOptions();
+
       return true;
     }
 

+ 39 - 44
lib/HLSL/HLModule.cpp

@@ -255,57 +255,52 @@ void HLModule::RemoveFunction(llvm::Function *F) {
   m_pOP->RemoveFunction(F);
 }
 
-template <typename TResource>
-bool RemoveResource(std::vector<std::unique_ptr<TResource>> &vec,
-                    GlobalVariable *pVariable) {
-  for (auto p = vec.begin(), e = vec.end(); p != e; ++p) {
-    if ((*p)->GetGlobalSymbol() == pVariable) {
-      p = vec.erase(p);
-      // Update ID.
-      for (e = vec.end();p != e; ++p) {
-        unsigned ID = (*p)->GetID()-1;
-        (*p)->SetID(ID);
+namespace {
+  template <typename TResource>
+  bool RemoveResource(std::vector<std::unique_ptr<TResource>> &vec,
+    GlobalVariable *pVariable, bool keepAllocated) {
+    for (auto p = vec.begin(), e = vec.end(); p != e; ++p) {
+      if ((*p)->GetGlobalSymbol() != pVariable)
+        continue;
+
+      if (keepAllocated && (*p)->IsAllocated()) {
+        // Keep the resource, but it has no more symbol.
+        (*p)->SetGlobalSymbol(UndefValue::get(pVariable->getType()));
+      } else {
+        // Erase the resource alltogether and update IDs of subsequent ones
+        p = vec.erase(p);
+        for (e = vec.end(); p != e; ++p) {
+          unsigned ID = (*p)->GetID() - 1;
+          (*p)->SetID(ID);
+        }
       }
+
       return true;
     }
+    return false;
   }
-  return false;
-}
-bool RemoveResource(std::vector<GlobalVariable *> &vec,
-                    llvm::GlobalVariable *pVariable) {
-  for (auto p = vec.begin(), e = vec.end(); p != e; ++p) {
-    if (*p == pVariable) {
-      vec.erase(p);
-      return true;
-    }
-  }
-  return false;
 }
 
 void HLModule::RemoveGlobal(llvm::GlobalVariable *GV) {
-  RemoveResources(&GV, 1);
-}
-
-void HLModule::RemoveResources(llvm::GlobalVariable **ppVariables,
-                               unsigned count) {
-  DXASSERT_NOMSG(count == 0 || ppVariables != nullptr);
-  unsigned resourcesRemoved = count;
-  for (unsigned i = 0; i < count; ++i) {
-    GlobalVariable *pVariable = ppVariables[i];
-    // This could be considerably faster - check variable type to see which
-    // resource type this is rather than scanning all lists, and look for
-    // usage and removal patterns.
-    if (RemoveResource(m_CBuffers, pVariable))
-      continue;
-    if (RemoveResource(m_SRVs, pVariable))
-      continue;
-    if (RemoveResource(m_UAVs, pVariable))
-      continue;
-    if (RemoveResource(m_Samplers, pVariable))
-      continue;
-    // TODO: do m_TGSMVariables and m_StreamOutputs need maintenance?
-    --resourcesRemoved; // Global variable is not a resource?
-  }
+  DXASSERT_NOMSG(GV != nullptr);
+
+  // With legacy resource reservation, we must keep unused resources around
+  // when they have a register allocation because they prevent that
+  // register range from being allocated to other resources.
+  bool keepAllocated = GetHLOptions().bLegacyResourceReservation;
+
+  // This could be considerably faster - check variable type to see which
+  // resource type this is rather than scanning all lists, and look for
+  // usage and removal patterns.
+  if (RemoveResource(m_CBuffers, GV, keepAllocated))
+    return;
+  if (RemoveResource(m_SRVs, GV, keepAllocated))
+    return;
+  if (RemoveResource(m_UAVs, GV, keepAllocated))
+    return;
+  if (RemoveResource(m_Samplers, GV, keepAllocated))
+    return;
+  // TODO: do m_TGSMVariables and m_StreamOutputs need maintenance?
 }
 
 HLModule::tgsm_iterator HLModule::tgsm_begin() {

+ 3 - 8
lib/Transforms/IPO/GlobalDCE.cpp

@@ -148,13 +148,6 @@ bool GlobalDCE::runOnModule(Module &M) {
       }
     }
 
-  // HLSL Change Starts - remove resource metadata
-  hlsl::HLModule *HLM = M.HasHLModule() ? &M.GetHLModule() : nullptr;
-  if (!DeadGlobalVars.empty() && HLM != nullptr) {
-    HLM->RemoveResources(DeadGlobalVars.data(), DeadGlobalVars.size());
-  }
-  // HLSL Change Ends
-
   // The second pass drops the bodies of functions which are dead...
   std::vector<Function*> DeadFunctions;
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -171,7 +164,9 @@ bool GlobalDCE::runOnModule(Module &M) {
     if (!AliveGlobals.count(I)) {
       DeadAliases.push_back(I);
       I->setAliasee(nullptr);
-    }
+    } 
+
+  hlsl::HLModule *HLM = M.HasHLModule() ? &M.GetHLModule() : nullptr; // HLSL Change
 
   if (!DeadFunctions.empty()) {
     // Now that all interferences have been dropped, delete the actual objects

+ 2 - 0
tools/clang/include/clang/Frontend/CodeGenOptions.h

@@ -181,6 +181,8 @@ public:
   bool HLSLDefaultRowMajor = false;
   /// Whether use legacy cbuffer load.
   bool HLSLNotUseLegacyCBufLoad = false;
+  /// Whether use legacy resource reservation.
+  bool HLSLLegacyResourceReservation = false;
   /// Set [branch] on every if.
   bool HLSLPreferControlFlow = false;
   /// Set [flatten] on every if.

+ 38 - 56
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -391,6 +391,7 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
   opts.bLegacyCBufferLoad = !CGM.getCodeGenOpts().HLSLNotUseLegacyCBufLoad;
   opts.bAllResourcesBound = CGM.getCodeGenOpts().HLSLAllResourcesBound;
   opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy;
+  opts.bLegacyResourceReservation = CGM.getCodeGenOpts().HLSLLegacyResourceReservation;
 
   opts.bUseMinPrecision = CGM.getLangOpts().UseMinPrecision;
   opts.bDX9CompatMode = CGM.getLangOpts().EnableDX9CompatMode;
@@ -2346,6 +2347,31 @@ hlsl::DxilResourceBase::Class CGMSHLSLRuntime::TypeToClass(clang::QualType Ty) {
   return hlsl::GetResourceClassForType(CGM.getContext(), Ty);
 }
 
+namespace {
+  void GetResourceDeclElemTypeAndRangeSize(CodeGenModule &CGM, HLModule &HL, VarDecl &VD,
+    QualType &ElemType, unsigned& rangeSize) {
+    ElemType = VD.getType().getCanonicalType();
+    rangeSize = 1;
+    while (const clang::ArrayType *arrayType = CGM.getContext().getAsArrayType(ElemType)) {
+      if (rangeSize != UINT_MAX) {
+        if (arrayType->isConstantArrayType()) {
+          rangeSize *= cast<ConstantArrayType>(arrayType)->getSize().getLimitedValue();
+        }
+        else {
+          if (HL.GetHLOptions().bLegacyResourceReservation) {
+            DiagnosticsEngine &Diags = CGM.getDiags();
+            unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+              "unbounded resources are not supported with -flegacy-resource-reservation");
+            Diags.Report(VD.getLocation(), DiagID);
+          }
+          rangeSize = UINT_MAX;
+        }
+      }
+      ElemType = arrayType->getElementType();
+    }
+  }
+}
+
 uint32_t CGMSHLSLRuntime::AddSampler(VarDecl *samplerDecl) {
   llvm::GlobalVariable *val =
     cast<llvm::GlobalVariable>(CGM.GetAddrOfGlobalVar(samplerDecl));
@@ -2354,34 +2380,12 @@ uint32_t CGMSHLSLRuntime::AddSampler(VarDecl *samplerDecl) {
   hlslRes->SetLowerBound(UINT_MAX);
   hlslRes->SetGlobalSymbol(val);
   hlslRes->SetGlobalName(samplerDecl->getName());
-  QualType VarTy = samplerDecl->getType();
-  if (const clang::ArrayType *arrayType =
-          CGM.getContext().getAsArrayType(VarTy)) {
-    if (arrayType->isConstantArrayType()) {
-      uint32_t arraySize =
-          cast<ConstantArrayType>(arrayType)->getSize().getLimitedValue();
-      hlslRes->SetRangeSize(arraySize);
-    } else {
-      hlslRes->SetRangeSize(UINT_MAX);
-    }
-    // use elementTy
-    VarTy = arrayType->getElementType();
-    // Support more dim.
-    while (const clang::ArrayType *arrayType =
-               CGM.getContext().getAsArrayType(VarTy)) {
-      unsigned rangeSize = hlslRes->GetRangeSize();
-      if (arrayType->isConstantArrayType()) {
-        uint32_t arraySize =
-            cast<ConstantArrayType>(arrayType)->getSize().getLimitedValue();
-        if (rangeSize != UINT_MAX)
-          hlslRes->SetRangeSize(rangeSize * arraySize);
-      } else
-        hlslRes->SetRangeSize(UINT_MAX);
-      // use elementTy
-      VarTy = arrayType->getElementType();
-    }
-  } else
-    hlslRes->SetRangeSize(1);
+
+  QualType VarTy;
+  unsigned rangeSize;
+  GetResourceDeclElemTypeAndRangeSize(CGM, *m_pHLModule, *samplerDecl,
+    VarTy, rangeSize);
+  hlslRes->SetRangeSize(rangeSize);
 
   const RecordType *RT = VarTy->getAs<RecordType>();
   DxilSampler::SamplerKind kind = KeywordToSamplerKind(RT->getDecl()->getName());
@@ -2763,38 +2767,16 @@ uint32_t CGMSHLSLRuntime::AddUAVSRV(VarDecl *decl,
   llvm::GlobalVariable *val =
       cast<llvm::GlobalVariable>(CGM.GetAddrOfGlobalVar(decl));
 
-  QualType VarTy = decl->getType().getCanonicalType();
-
   unique_ptr<HLResource> hlslRes(new HLResource);
   hlslRes->SetLowerBound(UINT_MAX);
   hlslRes->SetGlobalSymbol(val);
   hlslRes->SetGlobalName(decl->getName());
-  if (const clang::ArrayType *arrayType =
-          CGM.getContext().getAsArrayType(VarTy)) {
-    if (arrayType->isConstantArrayType()) {
-      uint32_t arraySize =
-          cast<ConstantArrayType>(arrayType)->getSize().getLimitedValue();
-      hlslRes->SetRangeSize(arraySize);
-    } else
-      hlslRes->SetRangeSize(UINT_MAX);
-    // use elementTy
-    VarTy = arrayType->getElementType();
-    // Support more dim.
-    while (const clang::ArrayType *arrayType =
-               CGM.getContext().getAsArrayType(VarTy)) {
-      unsigned rangeSize = hlslRes->GetRangeSize();
-      if (arrayType->isConstantArrayType()) {
-        uint32_t arraySize =
-            cast<ConstantArrayType>(arrayType)->getSize().getLimitedValue();
-        if (rangeSize != UINT_MAX)
-          hlslRes->SetRangeSize(rangeSize * arraySize);
-      } else
-        hlslRes->SetRangeSize(UINT_MAX);
-      // use elementTy
-      VarTy = arrayType->getElementType();
-    }
-  } else
-    hlslRes->SetRangeSize(1);
+
+  QualType VarTy;
+  unsigned rangeSize;
+  GetResourceDeclElemTypeAndRangeSize(CGM, *m_pHLModule, *decl,
+    VarTy, rangeSize);
+  hlslRes->SetRangeSize(rangeSize);
 
   for (hlsl::UnusualAnnotation *it : decl->getUnusualAnnotations()) {
     switch (it->getKind()) {

+ 14 - 0
tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_arrays_sm50.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc /T ps_6_0 /E main -flegacy-resource-reservation %s | FileCheck %s
+
+// Until SM 5.0, fxc takes into account explicit register allocation of unused resources
+// when allocating registers for used resources.
+
+// FXC - Tex1[1] texture float4 2d t4 1
+// CHECK: Tex1 texture f32 2d T0 t3 2
+
+Texture2D Tex0[2] : register(t1);
+Texture2D Tex1[2];
+float4 main() : SV_Target
+{
+	return Tex1[1].Load((uint3)0);
+}

+ 14 - 0
tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_arrays_sm51.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc /T ps_6_0 /E main %s | FileCheck %s
+
+// Until SM 5.0, fxc takes into account explicit register allocation of unused resources
+// when allocating registers for used resources.
+
+// FXC - Tex1 texture float4 2d T0 t0 2
+// CHECK: Tex1 texture f32 2d T0 t0 2
+
+Texture2D Tex0[2] : register(t1);
+Texture2D Tex1[2];
+float4 main() : SV_Target
+{
+	return Tex1[1].Load((uint3)0);
+}

+ 14 - 0
tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_sm50.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc /T ps_6_0 /E main -flegacy-resource-reservation %s | FileCheck %s
+
+// Until SM 5.0, FXC takes into account explicit register allocation of unused resources
+// when allocating registers for used resources.
+
+// FXC - Tex1 texture float4 2d t1 1
+// CHECK: Tex1 texture f32 2d T0 t1 1
+
+Texture2D Tex0 : register(t0);
+Texture2D Tex1;
+float4 main() : SV_Target
+{
+	return Tex1.Load((uint3)0);
+}

+ 9 - 0
tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_sm50_no_unbounded.hlsl

@@ -0,0 +1,9 @@
+// RUN: %dxc /T ps_6_0 /E main -flegacy-resource-reservation %s | FileCheck %s
+
+// CHECK: error: unbounded resources are not supported with -flegacy-resource-reservation
+
+Texture2D Tex[];
+float4 main() : SV_Target
+{
+	return 0;
+}

+ 13 - 0
tools/clang/test/CodeGenHLSL/quick-test/resource_binding_fxccompat_sm51.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc /T ps_6_0 /E main %s | FileCheck %s
+
+// Starting with SM 5.1, fxc ignores unused resources when allocating registers for used resources
+
+// FXC - Tex1 texture float4 2d T0 t0 1
+// CHECK: Tex1 texture f32 2d T0 t0 1
+
+Texture2D Tex0 : register(t0);
+Texture2D Tex1;
+float4 main() : SV_Target
+{
+	return Tex1.Load((uint3)0);
+}

+ 4 - 0
tools/clang/tools/dxc/dxc.cpp

@@ -801,6 +801,10 @@ int DxcContext::Compile() {
       const hlsl::ShaderModel *SM = hlsl::ShaderModel::GetByName(m_Opts.TargetProfile.str().c_str());
       if (SM->IsValid() && SM->GetMajor() < 6) {
         TargetProfile = hlsl::ShaderModel::Get(SM->GetKind(), 6, 0)->GetName();
+        if (!SM->IsSM51Plus()) {
+          // Add flag for backcompat with SM 5.0 resource reservation
+          args.push_back(L"-flegacy-resource-reservation");
+        }
       }
 
       if (!m_Opts.DebugFile.empty() && m_Opts.DebugFile.endswith(llvm::StringRef("\\"))) {

+ 3 - 6
tools/clang/tools/dxcompiler/dxcdisassembler.cpp

@@ -902,8 +902,7 @@ void PrintStructBufferDefinition(DxilResource *buf,
   llvm::Type *RetTy = buf->GetRetType();
   // Skip none struct type.
   if (!RetTy->isStructTy() || HLMatrixLower::IsMatrixType(RetTy)) {
-    Value *GV = buf->GetGlobalSymbol();
-    llvm::Type *Ty = GV->getType()->getPointerElementType();
+    llvm::Type *Ty = buf->GetGlobalSymbol()->getType()->getPointerElementType();
     // For resource array, use element type.
     if (Ty->isArrayTy())
       Ty = Ty->getArrayElementType();
@@ -942,8 +941,7 @@ void PrintStructBufferDefinition(DxilResource *buf,
 void PrintTBufferDefinition(DxilResource *buf, DxilTypeSystem &typeSys,
                                    raw_string_ostream &OS, StringRef comment) {
   const unsigned offsetIndent = 50;
-  Value *GV = buf->GetGlobalSymbol();
-  llvm::Type *Ty = GV->getType()->getPointerElementType();
+  llvm::Type *Ty = buf->GetGlobalSymbol()->getType()->getPointerElementType();
   // For TextureBuffer<> buf[2], the array size is in Resource binding count
   // part.
   if (Ty->isArrayTy())
@@ -969,8 +967,7 @@ void PrintTBufferDefinition(DxilResource *buf, DxilTypeSystem &typeSys,
 void PrintCBufferDefinition(DxilCBuffer *buf, DxilTypeSystem &typeSys,
                                    raw_string_ostream &OS, StringRef comment) {
   const unsigned offsetIndent = 50;
-  Value *GV = buf->GetGlobalSymbol();
-  llvm::Type *Ty = GV->getType()->getPointerElementType();
+  llvm::Type *Ty = buf->GetGlobalSymbol()->getType()->getPointerElementType();
   // For ConstantBuffer<> buf[2], the array size is in Resource binding count
   // part.
   if (Ty->isArrayTy())

+ 1 - 0
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -928,6 +928,7 @@ public:
     compiler.getCodeGenOpts().HLSLPreferControlFlow = Opts.PreferFlowControl;
     compiler.getCodeGenOpts().HLSLAvoidControlFlow = Opts.AvoidFlowControl;
     compiler.getCodeGenOpts().HLSLNotUseLegacyCBufLoad = Opts.NotUseLegacyCBufLoad;
+    compiler.getCodeGenOpts().HLSLLegacyResourceReservation = Opts.LegacyResourceReservation;
     compiler.getCodeGenOpts().HLSLDefines = defines;
     compiler.getCodeGenOpts().MainFileName = pMainFile;
 

+ 30 - 12
tools/clang/unittests/HLSL/OptimizerTest.cpp

@@ -65,9 +65,10 @@ public:
   TEST_METHOD(OptimizerWhenSlice1ThenOK)
   TEST_METHOD(OptimizerWhenSlice2ThenOK)
   TEST_METHOD(OptimizerWhenSlice3ThenOK)
+  TEST_METHOD(OptimizerWhenSliceWithIntermediateOptionsThenOK)
 
   void OptimizerWhenSliceNThenOK(int optLevel);
-  void OptimizerWhenSliceNThenOK(int optLevel, LPCWSTR pText, LPCWSTR pTarget);
+  void OptimizerWhenSliceNThenOK(int optLevel, LPCWSTR pText, LPCWSTR pTarget, llvm::ArrayRef<LPCWSTR> args = {});
 
   dxc::DxcDllSupport m_dllSupport;
   VersionSupportInfo m_ver;
@@ -106,6 +107,19 @@ TEST_F(OptimizerTest, OptimizerWhenSlice1ThenOK) { OptimizerWhenSliceNThenOK(1);
 TEST_F(OptimizerTest, OptimizerWhenSlice2ThenOK) { OptimizerWhenSliceNThenOK(2); }
 TEST_F(OptimizerTest, OptimizerWhenSlice3ThenOK) { OptimizerWhenSliceNThenOK(3); }
 
+TEST_F(OptimizerTest, OptimizerWhenSliceWithIntermediateOptionsThenOK) {
+  // The program below working depends on the LegacyResourceReservation option being
+  // carried through to the resource register allocator, even though it is not
+  // preserved in the final shader.
+  LPCWSTR SampleProgram =
+    L"Texture2D tex0 : register(t0);\r\n"
+    L"Texture2D tex1;\r\n" // tex1 should get register t1
+    L"float4 main() : SV_Target {\r\n"
+    L"  return tex1.Load((int3)0);\r\n"
+    L"}";
+  OptimizerWhenSliceNThenOK(1, SampleProgram, L"ps_6_0", { L"-flegacy-resource-reservation" });
+}
+
 void OptimizerTest::OptimizerWhenSliceNThenOK(int optLevel) {
   LPCWSTR SampleProgram =
     L"Texture2D g_Tex;\r\n"
@@ -138,7 +152,7 @@ static void ExtractFunctionPasses(std::vector<LPCWSTR> &passes, std::vector<LPCW
   passes.erase(firstPass, lastPass);
 }
 
-void OptimizerTest::OptimizerWhenSliceNThenOK(int optLevel, LPCWSTR pText, LPCWSTR pTarget) {
+void OptimizerTest::OptimizerWhenSliceNThenOK(int optLevel, LPCWSTR pText, LPCWSTR pTarget, llvm::ArrayRef<LPCWSTR> args) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOptimizer> pOptimizer;
   CComPtr<IDxcOperationResult> pResult;
@@ -157,22 +171,25 @@ void OptimizerTest::OptimizerWhenSliceNThenOK(int optLevel, LPCWSTR pText, LPCWS
   VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcCompiler, &pCompiler));
   VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer));
 
-  // Create the target program with a single invocation.
+  // Set up compilation args vector
   wchar_t OptArg[4] = L"/O0";
   OptArg[2] = L'0' + optLevel;
   Utf16ToBlob(m_dllSupport, pText, &pSource);
-  LPCWSTR args[] = { L"/Vd", OptArg };
-  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
-    pTarget, args, _countof(args), nullptr, 0, nullptr, &pResult));
+  std::vector<LPCWSTR> highLevelArgs = { L"/Vd", OptArg };
+  highLevelArgs.insert(highLevelArgs.end(), args.begin(), args.end());
+
+  // Create the target program with a single invocation.
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main", pTarget,
+    highLevelArgs.data(), static_cast<UINT32>(highLevelArgs.size()), nullptr, 0, nullptr, &pResult));
   VerifyOperationSucceeded(pResult);
   VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
   pResult.Release();
   std::string originalAssembly = DisassembleProgram(m_dllSupport, pProgram);
 
   // Get a list of passes for this configuration.
-  LPCWSTR optDumpArgs[] = { L"/Vd", OptArg, L"/Odump" };
-  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
-    pTarget, optDumpArgs, _countof(optDumpArgs), nullptr, 0, nullptr, &pResult));
+  highLevelArgs.emplace_back(L"/Odump");
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main", pTarget,
+    highLevelArgs.data(), static_cast<UINT32>(highLevelArgs.size()), nullptr, 0, nullptr, &pResult));
   VerifyOperationSucceeded(pResult);
   VERIFY_SUCCEEDED(pResult->GetResult(&pOptDump));
   pResult.Release();
@@ -180,9 +197,10 @@ void OptimizerTest::OptimizerWhenSliceNThenOK(int optLevel, LPCWSTR pText, LPCWS
   CA2W passesW(passes.c_str(), CP_UTF8);
 
   // Get the high-level compile of the program.
-  LPCWSTR highLevelArgs[] = { L"/Vd", OptArg, L"/fcgl" };
-  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
-    pTarget, highLevelArgs, _countof(highLevelArgs), nullptr, 0, nullptr, &pResult));
+  highLevelArgs.pop_back(); // Remove /Odump
+  highLevelArgs.emplace_back(L"/fcgl");
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main", pTarget,
+    highLevelArgs.data(), static_cast<UINT32>(highLevelArgs.size()), nullptr, 0, nullptr, &pResult));
   VerifyOperationSucceeded(pResult);
   VERIFY_SUCCEEDED(pResult->GetResult(&pHighLevelBlob));
   pResult.Release();